diff --git "a/1.\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\2272.0\347\233\256\346\250\231\344\273\213\347\264\271/index.html" "b/1.\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\2272.0\347\233\256\346\250\231\344\273\213\347\264\271/index.html" index 8ec638f..2cf05c7 100644 --- "a/1.\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\2272.0\347\233\256\346\250\231\344\273\213\347\264\271/index.html" +++ "b/1.\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\2272.0\347\233\256\346\250\231\344\273\213\347\264\271/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/10.KNN/index.html b/10.KNN/index.html index c7ab50b..6c23511 100644 --- a/10.KNN/index.html +++ b/10.KNN/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/11.SVM/index.html b/11.SVM/index.html index 66a11b0..f902313 100644 --- a/11.SVM/index.html +++ b/11.SVM/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/12.\346\261\272\347\255\226\346\250\271/index.html" "b/12.\346\261\272\347\255\226\346\250\271/index.html" index edb5772..e54fa24 100644 --- "a/12.\346\261\272\347\255\226\346\250\271/index.html" +++ "b/12.\346\261\272\347\255\226\346\250\271/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/13.\346\225\264\351\253\224\345\255\270\347\277\222/index.html" "b/13.\346\225\264\351\253\224\345\255\270\347\277\222/index.html" index 014f00f..d750ab7 100644 --- "a/13.\346\225\264\351\253\224\345\255\270\347\277\222/index.html" +++ "b/13.\346\225\264\351\253\224\345\255\270\347\277\222/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/14.\351\232\250\346\251\237\346\243\256\346\236\227/index.html" "b/14.\351\232\250\346\251\237\346\243\256\346\236\227/index.html" index 8e992a9..5546c37 100644 --- "a/14.\351\232\250\346\251\237\346\243\256\346\236\227/index.html" +++ "b/14.\351\232\250\346\251\237\346\243\256\346\236\227/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/15.XGBoost/index.html b/15.XGBoost/index.html index 62c3e85..001c25d 100644 --- a/15.XGBoost/index.html +++ b/15.XGBoost/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/16.Stacking/index.html b/16.Stacking/index.html index 460b6a8..a34f2cc 100644 --- a/16.Stacking/index.html +++ b/16.Stacking/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/17.LightGBM/index.html b/17.LightGBM/index.html index a3fc72f..bc4da9c 100644 --- a/17.LightGBM/index.html +++ b/17.LightGBM/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/18.CatBoost/index.html b/18.CatBoost/index.html index b57ad9c..7a31cac 100644 --- a/18.CatBoost/index.html +++ b/18.CatBoost/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/19.AutoML/index.html b/19.AutoML/index.html index 590114f..46e451f 100644 --- a/19.AutoML/index.html +++ b/19.AutoML/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/2.\345\277\253\344\276\206\346\216\242\347\264\242AI\347\232\204\344\270\226\347\225\214/index.html" "b/2.\345\277\253\344\276\206\346\216\242\347\264\242AI\347\232\204\344\270\226\347\225\214/index.html" index 3ce8c77..299eb6d 100644 --- "a/2.\345\277\253\344\276\206\346\216\242\347\264\242AI\347\232\204\344\270\226\347\225\214/index.html" +++ "b/2.\345\277\253\344\276\206\346\216\242\347\264\242AI\347\232\204\344\270\226\347\225\214/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/20.Auto-Sklearn/index.html b/20.Auto-Sklearn/index.html index 7c25717..2df6431 100644 --- a/20.Auto-Sklearn/index.html +++ b/20.Auto-Sklearn/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/21.Optuna/index.html b/21.Optuna/index.html index 3da4e2f..664a105 100644 --- a/21.Optuna/index.html +++ b/21.Optuna/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git a/22.Plotly-Express/index.html b/22.Plotly-Express/index.html index c9ac270..487eca4 100644 --- a/22.Plotly-Express/index.html +++ b/22.Plotly-Express/index.html @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/23.\350\263\207\346\226\231\345\210\206\345\270\203\350\210\207\351\233\242\347\276\244\345\200\274\350\231\225\347\220\206/index.html" "b/23.\350\263\207\346\226\231\345\210\206\345\270\203\350\210\207\351\233\242\347\276\244\345\200\274\350\231\225\347\220\206/index.html" index 3edafeb..c71f652 100644 --- "a/23.\350\263\207\346\226\231\345\210\206\345\270\203\350\210\207\351\233\242\347\276\244\345\200\274\350\231\225\347\220\206/index.html" +++ "b/23.\350\263\207\346\226\231\345\210\206\345\270\203\350\210\207\351\233\242\347\276\244\345\200\274\350\231\225\347\220\206/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
diff --git "a/24.\344\270\215\350\203\275\345\277\275\350\246\226\347\232\204\351\201\216\346\223\254\345\220\210\350\210\207\346\254\240\346\223\254\345\220\210/index.html" "b/24.\344\270\215\350\203\275\345\277\275\350\246\226\347\232\204\351\201\216\346\223\254\345\220\210\350\210\207\346\254\240\346\223\254\345\220\210/index.html" index b8be886..3cb25ee 100644 --- "a/24.\344\270\215\350\203\275\345\277\275\350\246\226\347\232\204\351\201\216\346\223\254\345\220\210\350\210\207\346\254\240\346\223\254\345\220\210/index.html" +++ "b/24.\344\270\215\350\203\275\345\277\275\350\246\226\347\232\204\351\201\216\346\223\254\345\220\210\350\210\207\346\254\240\346\223\254\345\220\210/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
@@ -377,13 +377,15 @@

如何選擇最佳的模型?

接著我們拿測試資料進行模型預測,可以發現由於紅色虛線的模型已經完整記住了訓練集的趨勢,因此在新的沒看過的資料表現就沒有那麼好了。尤其是在兩類別分隔線附近的資料最能看出端倪。於是我們可以很確定紅色虛線的模型已經過度擬合訓練資料了。另外紅色實現的模型雖然在訓練集中有幾筆會預測錯誤,但是它再測試集資料中一樣保持穩定的預測能力。

從上述的例子我們可以得知,在訓練模型時並非訓練集的誤差越小越好。我們必須同時拿測試集驗證模型的預測能力,目標是訓練集與測試集的平均誤差要越近越好。

-

一個適當的機器學習工作流程包括: -- 切割訓練集與測試集 -- 資料視覺化與前處理 -- 尋找適合的模型 -- 調整模型超參數 -- 使用適當的指標評估模型 -- 交叉驗證模型

+

一個適當的機器學習工作流程包括:

+
    +
  • 切割訓練集與測試集
  • +
  • 資料視覺化與前處理
  • +
  • 尋找適合的模型
  • +
  • 調整模型超參數
  • +
  • 使用適當的指標評估模型
  • +
  • 交叉驗證模型
  • +

Overfitting vs. Underfitting

過度擬合的反義就是欠擬合,從字面上可以得知模型預測能力是不好的。當模型太簡單時會發生欠擬合,或是加入太多的 L1/L2 正則化限制模型預測能力,使模型在從數據集中學習時變得不靈活。一個過於簡單的模型在預測中往往具有較小的方差(variance)而導致偏差(bias)就會變大。相反的過於複雜的模型會有較的變異進而導致方差大,同時偏差會變小。偏差和方差都是機器學習中的預測誤差的方式。在一般情況下我們可以減少偏差所引起的誤差,但可能會導致增加方差引起的誤差,反之亦然。

diff --git "a/25.\344\272\244\345\217\211\351\251\227\350\255\211 Cross-Validation \347\260\241\344\273\213/index.html" "b/25.\344\272\244\345\217\211\351\251\227\350\255\211 Cross-Validation \347\260\241\344\273\213/index.html" index 907f8a3..978cc8c 100644 --- "a/25.\344\272\244\345\217\211\351\251\227\350\255\211 Cross-Validation \347\260\241\344\273\213/index.html" +++ "b/25.\344\272\244\345\217\211\351\251\227\350\255\211 Cross-Validation \347\260\241\344\273\213/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
@@ -465,36 +465,69 @@

Holdout Method

此方法是最經典且最簡單實作的交叉驗證法,Holdout 顧名思義就是將資料切出一部分作為模型評估的依據。在這種方法中,我們將資料隨機分為三部分:訓練集、驗證集和測試集。其中只有訓練集資料實際參與訓練,其餘的資料僅拿來評估模型好壞。驗證集使用時機是在訓練過程中可以檢視訓練的趨勢,若有發現過擬合擬合跡象可以提早發現並解決。以及方便我們進行調整超參數以及選擇最佳的模型。當然僅透過驗證集不能代表全部,因此最後確定好模型時。我們會再拿事先切好的測試集進行最終的評估,檢視模型的泛化能力。

參考

-

優點: -1. 簡單實作。 -2. 驗證集可以被拿來評估模型在訓練過程中的學習成果。 -3. 測試集可以評估模型泛化能力。

-

缺點: -1. 當資料集變異量較大時,驗證集與測試集可能無法足以評估模型。 -2. 不適合用在資料不平衡的資料集。

+

優點:

+
    +
  1. 簡單實作。
  2. +
  3. 驗證集可以被拿來評估模型在訓練過程中的學習成果。
  4. +
  5. 測試集可以評估模型泛化能力。
  6. +
+

缺點:

+
    +
  1. 當資料集變異量較大時,驗證集與測試集可能無法足以評估模型。
  2. +
  3. 不適合用在資料不平衡的資料集。
  4. +

K-fold Cross-Validation

上一個方法雖然簡單,但是在訓練過程中僅切一份驗證集往往不能夠代表全部。因此我們可以透過一些技巧切割驗證集,使得訓練過程中有一個更公正的評估方式。我們可以透過 K-Fold 方法將訓練資料再依序切割訓練集與測試集,K-Fold 裡面的測試集可以當成驗證集。K-Fold 的方法中 K 是由我們自由調控的,在每次的迭代中會選擇一組作為驗證集,其餘 (k-1) 組作為訓練集。透過這種方式學習,不同分組訓練的結果進行平均來減少方差,因此模型的性能對數據的劃分就不會那麼敏感。

參考

-

優點: -1. 降低模型訓練對於資料集的偏差。 -2. 訓練集與驗證集完整被充分利用與學習。

-

缺點: -1. 不適合用於資料不平衡的資料集。 -2. 如果要簡單的 K-fold 來尋找超參數會有資料洩漏問題導致訓練結果有偏差,因為在每個 Fold 中都會使用同一組資料進行驗證。 -3. 在相同的驗證集計算模型的誤差,當找到了最佳的超參數。這可能會導致重大偏差,有過擬合擬合疑慮。

+

優點:

+
    +
  1. 降低模型訓練對於資料集的偏差。
  2. +
  3. 訓練集與驗證集完整被充分利用與學習。
  4. +
+

缺點:

+
    +
  1. 不適合用於資料不平衡的資料集。
  2. +
  3. 如果要簡單的 K-fold 來尋找超參數會有資料洩漏問題導致訓練結果有偏差,因為在每個 Fold 中都會使用同一組資料進行驗證。
  4. +
  5. 在相同的驗證集計算模型的誤差,當找到了最佳的超參數。這可能會導致重大偏差,有過擬合擬合疑慮。
  6. +

Leave One Out

此方法是 K-fold 其中一種特例,當 K 等於資料集的數量時就等於 Leave One Out 方法。也就是在每次訓練時僅會把一筆資料當成測試資料,其餘的 N-1 筆資料作為訓練模型的資料。此作法相當簡單明瞭,但是訓練負擔會非常重且耗時。然而 Leave p-out 是另一種技巧,其中的 p 使用者可以自己設定每次訓練需要留幾筆資料作為測試集。

-

優點: -1. 簡單且容易理解,好實作。

-

缺點: -1. 需要花費更多的訓練時間。

+

優點:

+
    +
  1. 簡單且容易理解,好實作。
  2. +
+

缺點:

+
    +
  1. 需要花費更多的訓練時間。
  2. +

Random Subsampling

+

Random Subsampling 方法是一種簡單且常用的交叉驗證技術,它透過多次隨機抽樣將資料集切割成訓練集與測試集。每次隨機分割時,測試集的比例固定,而訓練集和測試集則隨機選取。這種方法主要透過多次隨機測試不同的資料切分方式來評估模型性能,最後取測試結果的平均值。

+

優點:

+
    +
  1. 多次隨機抽樣能減少資料劃分的偏差。
  2. +
  3. 可以靈活選擇訓練和測試集的比例。
  4. +
+

缺點:

+
    +
  1. 多次重複抽樣需要較大的計算資源。
  2. +
  3. 每次隨機抽樣的資料集可能會有所不同,結果不穩定。
  4. +

Bootstrapping

還有一種比較特殊的交叉驗證方式,Bootstrapping 自助抽樣法。是一種從給定訓練集中有放回的均勻抽樣,也就是說,每當選中一個樣本,它等可能地被再次選中並被再次添加到訓練集中。假設每次訓練都採樣十個樣本,在這十筆資料中很有可能會再次被隨機抽到。剩下沒有抽到的資料則都變成測試集,用來評估訓練完的模型。

+

優點:

+
    +
  1. 能在小數據集的情況下提高模型穩定性。
  2. +
  3. 可重複利用相同的數據來進行多次訓練。
  4. +
+

缺點:

+
    +
  1. 重複樣本可能導致模型過擬合。
  2. +
  3. 測試集中資料量較少,可能導致模型泛化能力評估不夠充分。
  4. +

小結

交叉驗證是訓練模型中非常重要的技巧,尤其是當手邊的資料集有限時更應該使用。透過交叉驗證技巧,即使在數據有限的情況下,我們也能夠獲得準確的結果,並且可以避免模型過度擬合。並為我們提供更準確的模型預測性能估計方式,同時也能夠提升模型的泛化能力。以上的方法可以直接使用 scikit-learn 裡面 model_selection 底下的 cross_val_score 方法進行實作。

diff --git "a/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/image/img26-4.png" "b/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/image/img26-4.png" index 7ebe038..0fb6b77 100644 Binary files "a/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/image/img26-4.png" and "b/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/image/img26-4.png" differ diff --git "a/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/index.html" "b/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/index.html" index d745763..973179d 100644 --- "a/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/index.html" +++ "b/26.\344\272\244\345\217\211\351\251\227\350\255\211 K-Fold Cross-Validation/index.html" @@ -91,7 +91,7 @@
- +
@@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
- +
@@ -315,8 +315,8 @@
  • - - K-Fold Cross-Validation + + K-Fold Cross Validation
  • @@ -325,18 +325,18 @@
  • - - Repeated K-Fold + + Repeated K-Fold Cross Validation
  • - - Stratified K-Fold + + Stratified K-Fold Cross Validation
  • - - Group K-Fold + + Group K-Fold Cross Validation
  • @@ -387,8 +387,8 @@
  • - - K-Fold Cross-Validation + + K-Fold Cross Validation
  • @@ -397,18 +397,18 @@
  • - - Repeated K-Fold + + Repeated K-Fold Cross Validation
  • - - Stratified K-Fold + + Stratified K-Fold Cross Validation
  • - - Group K-Fold + + Group K-Fold Cross Validation
  • @@ -422,17 +422,17 @@

    [Day 26] 交叉驗證 K-Fold Cross-Valid

    今日學習目標

    • 了解 K-Fold 各種不同變形
        -
      • K-Fold Cross-Validation
      • +
      • K-Fold Cross Validation
      • Nested K-Fold Cross Validation
      • -
      • Repeated K-Fold
      • -
      • Stratified K-Fold
      • -
      • Group K-Fold
      • +
      • Repeated K-Fold Cross Validation
      • +
      • Stratified K-Fold Cross Validation
      • +
      • Group K-Fold Cross Validation

    前言

    交叉驗證又稱為樣本外測試,是資料科學中重要的一環。透過資料間的重複採樣過程,用於評估機器學習模型並驗證模型對獨立測試數據集的泛化能力。在今天的文章中我們將詳細的來介紹每一種 K-Fold 變型。

    -

    K-Fold Cross-Validation

    +

    K-Fold Cross Validation

    在 K-Fold 的方法中我們會將資料切分為 K 等份,K 是由我們自由調控的,以下圖為例:假設我們設定 K=10,也就是將訓練集切割為十等份。這意味著相同的模型要訓練十次,每一次的訓練都會從這十等份挑選其中九等份作為訓練資料,剩下一等份未參與訓練並作為驗證集。因此訓練十回將會有十個不同驗證集的 Error,這個 Error 通常我們會稱作 loss 也就是模型評估方式。模型評估方式有很多種,以回歸問題來說就有 MSE、MAE、RMSE...等。最終把這十次的 loss 加總起來取平均就可以當成最終結果。透過這種方式,不同分組訓練的結果進行平均來減少方差,因此模型的性能對數據的劃分就不會那麼敏感。

    參考

    @@ -446,26 +446,32 @@

    Nested K-Fold Cross Validation

    -

    Repeated K-Fold

    +

    Repeated K-Fold Cross Validation

    另一個 K-Fold 變型為 Repeated K-Fold 顧名思義就是重複 n 次 K-Fold cross-validation。假設 K=2、n=2 代表 2-fold cross validation,在每一回合又會將資料將會打亂得到新組合。因此最終會得到 4 組的資料,意味著模型將訓練四遍。此種方法會確保每次組合的隨機資料並不會重複。簡單來說執行 K-Fold 交叉驗證,然後重新洗牌數據,然後再次執行 K-Fold。

    -

    Stratified K-Fold

    +

    Stratified K-Fold Cross Validation

    分層交叉驗證,每個 Fold 都是按照類別的比例抽出來的。假設這個分類任務一共有三個類別A、B、C,它們的比例是1:4:8。那麼每個fold中的A、B、C的比例也必須是1:4:8。其實現方式也非常簡單,首先依序把A、B、C類別的數據隨機分成k組,最後再把它們合併依照比例起來,就得到了k組滿足1:2:10的數據了。

    -

    優點: -1. 優於一般的 K-Fold 因為test set能充分代表整體數據。 -2. 預測結果的方差也會變小,使得交叉驗證的 error 更可靠。 -3. 對於資料不平衡的數據很有用

    -

    缺點: -1. 大多實例都以分類問題為主

    - -

    Group K-Fold

    + +

    Group K-Fold Cross Validation

    此做法為了避免取連續的資料而造成測試集或驗證集偏向某一特別的狀況而造成過度擬和訓練集,反而在未看過的資料下表現不好。Group K-Fold 為了避免此情況發生,它切割資料時有效的從資料集中每個區塊隨機挑選作為驗證集。同時保證每一個 Fold 的驗證集並不會重複的資料。假設你有三個類別,至少驗證集必須從三個不同的分組中抽樣取出,同時確保每一個 Fold 所抽出來的這三個分組並不會重複。

      diff --git "a/27.\346\251\237\345\231\250\345\255\270\347\277\222\345\270\270\347\212\257\351\214\257\347\232\204\345\215\201\344\273\266\344\272\213/index.html" "b/27.\346\251\237\345\231\250\345\255\270\347\277\222\345\270\270\347\212\257\351\214\257\347\232\204\345\215\201\344\273\266\344\272\213/index.html" index 68ee448..3037a74 100644 --- "a/27.\346\251\237\345\231\250\345\255\270\347\277\222\345\270\270\347\212\257\351\214\257\347\232\204\345\215\201\344\273\266\344\272\213/index.html" +++ "b/27.\346\251\237\345\231\250\345\255\270\347\277\222\345\270\270\347\212\257\351\214\257\347\232\204\345\215\201\344\273\266\344\272\213/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    @@ -505,7 +505,8 @@

    1. 資料收集與處理不當

    2. 訓練集與測試集的類別分佈不一致

    在分類的資料中,初學者常見的錯誤是忘記使用分層抽樣 (stratify) 來對訓練集和測試集進行切割。當測試集的分佈盡可能與訓練相同情況下,模型才更有可能得到更準確的預測。然而在分類的問題中,我們必須更關注每個類別的資料分佈比例。以下舉個例子:假設我們有三個標籤的類別,而這三個類別的分佈比例分別為 4:3:3。同理我們在進行資料切割的時候必須確保訓練集與測試集需要有相同的資料分佈比例。

    大家應該都使用過 Sklearn 的 train_test_split 進行資料切割。在此方法中 Sklearn 提供了一個 stratify 參數達到分層隨機抽樣的目的。特別是在原始數據中樣本標籤分佈不均衡時非常有用,一些分類問題可能會在目標類的分佈中表現出很大的不平衡:例如,負樣本與正樣本比例懸殊(信用卡盜刷預測、離職員工預測)。以下用紅酒分類預測來進行示範,首先我們不使用 stratify 隨機切割資料並查看資料切割前後的三種類別比例。

    -
    from sklearn.datasets import load_wine
    +
    import pandas as pd
    +from sklearn.datasets import load_wine
     from sklearn.model_selection import train_test_split
     
     X, y = load_wine(return_X_y=True)
    @@ -607,7 +608,7 @@ 

    5. 資料處理不當導致資料洩漏

    更強大的解決方案是使用 Sklearn 內建的 pipeline,它能夠保護模型免於資料洩漏的問題。此方法能夠確保訓練資料僅參與轉換擬合與模型訓練,而測試資料僅用於計算並驗證模型。


    6. 僅使用測試集評估模型好壞

    -

    如果你的測試資料 R2 score 得到了 0.85 就代表很好了嗎?不盡然!儘管有高的測試分數通常意味著模型表現佳,但在解釋測試結果時仍有一些重要的注意事項。首先最重要的,無論分數值如何測試集的分數一定要與訓練集相比較才能確保模型訓練好與壞。當你的模型訓練集分數高於測試集的分數,並且兩者都足夠高以滿足專案的目標期望時這代表你訓練了一個好模型。然而這並不意味著訓練和測試分數之間的差異越大越好。舉個例子,若訓練集的 R2 score 為 0.85 測試集為 0.8 即代表模型既不過度擬合(overfit)也不欠擬合(underfit)。但是如果訓練集 0.9 測試集 0.8 的時候,你的模型就是過擬合。其原因是該模型沒有在訓練期間進行泛化,而是記住了一些訓練數據,從而導致測試分數低得多。

    +

    如果你的測試資料 R2 score 得到了 0.85 就代表很好了嗎?不盡然!儘管有高的測試分數通常意味著模型表現佳,但在解釋測試結果時仍有一些重要的注意事項。首先最重要的,無論分數值如何測試集的分數一定要與訓練集相比較才能確保模型訓練好與壞。當你的模型訓練集分數高於測試集的分數,並且兩者都足夠高以滿足專案的目標期望時這代表你訓練了一個好模型。然而這並不意味著訓練和測試分數之間的差異越大越好。舉個例子,若訓練集的 R2 score 為 0.85 測試集為 0.8 即代表模型既不過度擬合(overfit)也不欠擬合(underfit)。但是如果訓練集 0.9 測試集 0.7 的時候,你的模型就是過擬合。其原因是該模型沒有在訓練期間進行泛化,而是記住了一些訓練數據,從而導致測試分數低得多。

    在大多數任務中你將會看到許多人使用 tree-based 模型或是整體學習模型 (ensemble models)。例如在隨機森林演算法當中如果它們的樹深度太深,往往會獲得非常高的訓練分數,從而導致過度擬合。另外也有測試集的分數比訓練集高的情況,若發生此情況時通常都會感覺是不是做錯了什麼。這種情況的主要原因是資料洩漏,也就是上一節我們討論的情況。或是你的測試資料筆數太少,沒辦法足以驗證模型好壞。

    另外有時候我們也會得到在訓練集有很好的表現但測試集無敵差的情況。當訓練和測試分數差異很大時,問題往往與測試集有關而不是過度擬合。這時候你可能要檢查資料預處理的方式是否一致 (像是取 log 或 scale),或是只是忘記對測試集做轉換處理。

    這裡做一個小結,總之在訓練好模型時請仔細檢查訓練和測試分數之間的差距。並且可以透過此評估方式檢視模型是否過擬合,同時也能進行模型條參或是選擇最佳的資料預處理方式。並為最終的模型做最佳的準備。

    diff --git "a/28.\345\204\262\345\255\230\350\250\223\347\267\264\345\245\275\347\232\204\346\250\241\345\236\213/index.html" "b/28.\345\204\262\345\255\230\350\250\223\347\267\264\345\245\275\347\232\204\346\250\241\345\236\213/index.html" index 5441b14..0f56189 100644 --- "a/28.\345\204\262\345\255\230\350\250\223\347\267\264\345\245\275\347\232\204\346\250\241\345\236\213/index.html" +++ "b/28.\345\204\262\345\255\230\350\250\223\347\267\264\345\245\275\347\232\204\346\250\241\345\236\213/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/29.\344\275\277\347\224\250Python-Flask\346\236\266\350\250\255API\345\220\247/index.html" "b/29.\344\275\277\347\224\250Python-Flask\346\236\266\350\250\255API\345\220\247/index.html" index aa7652e..5f1c795 100644 --- "a/29.\344\275\277\347\224\250Python-Flask\346\236\266\350\250\255API\345\220\247/index.html" +++ "b/29.\344\275\277\347\224\250Python-Flask\346\236\266\350\250\255API\345\220\247/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/3.\344\275\240\347\234\237\344\272\206\350\247\243\350\263\207\346\226\231\345\227\216\350\251\246\350\251\246\347\234\213\350\246\226\350\246\272\345\214\226\345\210\206\346\236\220\345\220\247/index.html" "b/3.\344\275\240\347\234\237\344\272\206\350\247\243\350\263\207\346\226\231\345\227\216\350\251\246\350\251\246\347\234\213\350\246\226\350\246\272\345\214\226\345\210\206\346\236\220\345\220\247/index.html" index 4c92936..fda8dad 100644 --- "a/3.\344\275\240\347\234\237\344\272\206\350\247\243\350\263\207\346\226\231\345\227\216\350\251\246\350\251\246\347\234\213\350\246\226\350\246\272\345\214\226\345\210\206\346\236\220\345\220\247/index.html" +++ "b/3.\344\275\240\347\234\237\344\272\206\350\247\243\350\263\207\346\226\231\345\227\216\350\251\246\350\251\246\347\234\213\350\246\226\350\246\272\345\214\226\345\210\206\346\236\220\345\220\247/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/30.\344\275\277\347\224\250 Heroku \351\203\250\347\275\262\346\251\237\345\231\250\345\255\270\347\277\222 API/index.html" "b/30.\344\275\277\347\224\250 Heroku \351\203\250\347\275\262\346\251\237\345\231\250\345\255\270\347\277\222 API/index.html" index d4a21aa..39a98c3 100644 --- "a/30.\344\275\277\347\224\250 Heroku \351\203\250\347\275\262\346\251\237\345\231\250\345\255\270\347\277\222 API/index.html" +++ "b/30.\344\275\277\347\224\250 Heroku \351\203\250\347\275\262\346\251\237\345\231\250\345\255\270\347\277\222 API/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/4.\345\222\261\345\200\221\344\270\200\350\265\267\345\201\232\350\263\207\346\226\231\346\270\205\347\220\206\345\222\214\345\211\215\350\231\225\347\220\206/index.html" "b/4.\345\222\261\345\200\221\344\270\200\350\265\267\345\201\232\350\263\207\346\226\231\346\270\205\347\220\206\345\222\214\345\211\215\350\231\225\347\220\206/index.html" index 99420f0..7dff3e5 100644 --- "a/4.\345\222\261\345\200\221\344\270\200\350\265\267\345\201\232\350\263\207\346\226\231\346\270\205\347\220\206\345\222\214\345\211\215\350\231\225\347\220\206/index.html" +++ "b/4.\345\222\261\345\200\221\344\270\200\350\265\267\345\201\232\350\263\207\346\226\231\346\270\205\347\220\206\345\222\214\345\211\215\350\231\225\347\220\206/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git a/404.html b/404.html index 1e474d1..02a2bcd 100644 --- a/404.html +++ b/404.html @@ -153,7 +153,7 @@ - +
    @@ -200,7 +200,7 @@ - +
    diff --git "a/5.\346\251\237\345\231\250\345\255\270\347\277\222\345\244\247\350\243\234\345\270\226/index.html" "b/5.\346\251\237\345\231\250\345\255\270\347\277\222\345\244\247\350\243\234\345\270\226/index.html" index d10a301..dadd791 100644 --- "a/5.\346\251\237\345\231\250\345\255\270\347\277\222\345\244\247\350\243\234\345\270\226/index.html" +++ "b/5.\346\251\237\345\231\250\345\255\270\347\277\222\345\244\247\350\243\234\345\270\226/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/6.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222k-means\345\210\206\347\276\244/index.html" "b/6.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222k-means\345\210\206\347\276\244/index.html" index bacfe6b..c66eef9 100644 --- "a/6.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222k-means\345\210\206\347\276\244/index.html" +++ "b/6.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222k-means\345\210\206\347\276\244/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/7.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222-\351\231\215\347\266\255/index.html" "b/7.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222-\351\231\215\347\266\255/index.html" index 296c045..85371ca 100644 --- "a/7.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222-\351\231\215\347\266\255/index.html" +++ "b/7.\351\235\236\347\233\243\347\235\243\345\274\217\345\255\270\347\277\222-\351\231\215\347\266\255/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/8.\347\267\232\346\200\247\350\277\264\346\255\270/index.html" "b/8.\347\267\232\346\200\247\350\277\264\346\255\270/index.html" index eae1d7a..2863f10 100644 --- "a/8.\347\267\232\346\200\247\350\277\264\346\255\270/index.html" +++ "b/8.\347\267\232\346\200\247\350\277\264\346\255\270/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/9.\351\202\217\350\274\257\350\277\264\346\255\270/index.html" "b/9.\351\202\217\350\274\257\350\277\264\346\255\270/index.html" index 3a8b391..5aede62 100644 --- "a/9.\351\202\217\350\274\257\350\277\264\346\255\270/index.html" +++ "b/9.\351\202\217\350\274\257\350\277\264\346\255\270/index.html" @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git a/google0a1389a852a5fccd.html b/google0a1389a852a5fccd.html new file mode 100644 index 0000000..26085bb --- /dev/null +++ b/google0a1389a852a5fccd.html @@ -0,0 +1 @@ +google-site-verification: google0a1389a852a5fccd.html \ No newline at end of file diff --git a/index.html b/index.html index cff3c94..8b895a3 100644 --- a/index.html +++ b/index.html @@ -91,7 +91,7 @@
    - +
    @@ -120,7 +120,7 @@ 全民瘋AI系列 [經典機器學習]
    - +
    diff --git "a/pdf/\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\227_\347\266\223\345\205\270\346\251\237\345\231\250\345\255\270\347\277\222_v2.1.pdf" "b/pdf/\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\227_\347\266\223\345\205\270\346\251\237\345\231\250\345\255\270\347\277\222_v2.1.pdf" index eac1c2c..5d98115 100644 Binary files "a/pdf/\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\227_\347\266\223\345\205\270\346\251\237\345\231\250\345\255\270\347\277\222_v2.1.pdf" and "b/pdf/\345\205\250\346\260\221\347\230\213AI\347\263\273\345\210\227_\347\266\223\345\205\270\346\251\237\345\231\250\345\255\270\347\277\222_v2.1.pdf" differ diff --git a/search/search_index.json b/search/search_index.json index 921ff6b..d077ecb 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"\u5168\u6c11\u760bAI\u7cfb\u52172.0 \u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd \u524d\u8a00 \u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002 \u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0? \u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002 \u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283 \u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002 \u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90 \u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002\u6216\u662f\u53ef\u4ee5\u95b1\u8b80\u5176\u4ed6\u76f8\u95dc \u6587\u7ae0 \u3002 \u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70 \u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002 \u95dc\u65bc\u4f5c\u8005 \u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5168\u6c11\u760bAI\u7cfb\u52172.0"},{"location":"#ai20","text":"\u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd","title":"\u5168\u6c11\u760bAI\u7cfb\u52172.0"},{"location":"#_1","text":"\u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002","title":"\u524d\u8a00"},{"location":"#_2","text":"\u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002","title":"\u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0?"},{"location":"#_3","text":"\u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002","title":"\u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283"},{"location":"#_4","text":"\u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002\u6216\u662f\u53ef\u4ee5\u95b1\u8b80\u5176\u4ed6\u76f8\u95dc \u6587\u7ae0 \u3002","title":"\u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90"},{"location":"#_5","text":"\u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002","title":"\u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70"},{"location":"#_6","text":"\u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u95dc\u65bc\u4f5c\u8005"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/","text":"[Day 1] \u76ee\u6a19\u4ecb\u7d39 \u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd \u524d\u8a00 \u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002 \u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0? \u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002 \u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283 \u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002 \u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90 \u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002 \u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70 \u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002 \u95dc\u65bc\u4f5c\u8005 \u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 1] \u76ee\u6a19\u4ecb\u7d39"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#day-1","text":"\u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd","title":"[Day 1] \u76ee\u6a19\u4ecb\u7d39"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_1","text":"\u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002","title":"\u524d\u8a00"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_2","text":"\u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002","title":"\u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0?"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_3","text":"\u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002","title":"\u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_4","text":"\u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002","title":"\u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_5","text":"\u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002","title":"\u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_6","text":"\u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u95dc\u65bc\u4f5c\u8005"},{"location":"10.KNN/","text":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 K-\u8fd1\u9130\u6f14\u7b97\u6cd5\u4ecb\u7d39 KNN \u6f14\u7b97\u6cd5\u89e3\u6790 KNN \u65bc\u5206\u985e\u5668\u548c\u8ff4\u6b78\u5668\u7684\u505a\u6cd5 \u6bd4\u8f03 KNN \u8207 k-means \u5dee\u7570 \u5be6\u4f5c KNN \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u5be6\u4f5c KNN \u5206\u985e\u5668\uff0c\u89c0\u5bdf\u4e0d\u540c k \u503c\u6703\u5c0d\u5206\u985e\u7d50\u679c\u9020\u6210\u4ec0\u9ebc\u5f71\u97ff \u5be6\u4f5c KNN \u8ff4\u6b78\u8ff4\u5668 \u7bc4\u4f8b\u7a0b\u5f0f KNN(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f KNN(Regression)\uff1a K-\u8fd1\u9130\u6f14\u7b97\u6cd5 (KNN) KNN \u7684\u5168\u540d K Nearest Neighbor \u662f\u5c6c\u65bc\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684 Supervised learning \u5176\u4e2d\u4e00\u7a2e\u7b97\u6cd5\uff0c\u9867\u540d\u601d\u7fa9\u5c31\u662f k \u500b\u6700\u63a5\u8fd1\u4f60\u7684 \u9130\u5c45 \u3002\u5206\u985e\u7684\u6a19\u6e96\u662f\u7531\u9130\u5c45\u300c\u591a\u6578\u8868\u6c7a\u300d\u6c7a\u5b9a\u7684\u3002\u5728 Sklearn \u4e2d KNN \u53ef\u4ee5\u7528\u4f5c\u5206\u985e\u6216\u8ff4\u6b78\u7684\u6a21\u578b\u3002 KNN \u5206\u985e\u5668 \u5728\u5206\u985e\u554f\u984c\u4e2d KNN \u6f14\u7b97\u6cd5\u63a1\u591a\u6578\u6c7a\u6a19\u6e96\uff0c\u5229\u7528 k \u500b\u6700\u8fd1\u7684\u9130\u5c45\u4f86\u5224\u5b9a\u65b0\u7684\u8cc7\u6599\u662f\u5728\u54ea\u4e00\u7fa4\u3002\u5176\u6f14\u7b97\u6cd5\u6d41\u7a0b\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f7f\u7528\u8005\u5148\u6c7a\u5b9a k \u7684\u5927\u5c0f\u3002\u63a5\u8457\u8a08\u7b97\u76ee\u524d\u8a72\u7b46\u65b0\u7684\u8cc7\u6599\u8207\u9130\u8fd1\u7684\u8cc7\u6599\u9593\u7684\u8ddd\u96e2\u3002\u7b2c\u4e09\u6b65\u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44\u3002 \u6c7a\u5b9a k \u503c \u6c42\u6bcf\u500b\u9130\u5c45\u8ddf\u81ea\u5df1\u4e4b\u9593\u7684\u8ddd\u96e2 \u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44 \u5982\u679c\u9084\u662f\u6c92\u8fa6\u6cd5\u6c7a\u5b9a\u5728\u54ea\u4e00\u7d44\uff0c\u56de\u5230\u7b2c\u4e00\u6b65\u8abf\u6574 k \u503c\uff0c\u518d\u7e7c\u7e8c k \u7684\u5927\u5c0f\u6703\u5f71\u97ff\u6a21\u578b\u6700\u7d42\u7684\u5206\u985e\u7d50\u679c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u7da0\u8272\u9ede\u662f\u65b0\u7684\u8cc7\u6599\u3002\u7576 k \u7b49\u65bc 3 \u6642\u6703\u641c\u5c0b\u96e2\u7da0\u8272\u9ede\u6700\u8fd1\u7684\u9130\u5c45\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u4e09\u89d2\u5f62\u70ba\u9810\u6e2c\u7684\u7d50\u679c\u3002\u7576 k \u8a2d\u70ba 5 \u7684\u6642\u5019\u7d50\u679c\u53c8\u4e0d\u4e00\u6a23\u4e86\uff0c\u6211\u5011\u767c\u73fe\u8ddd\u96e2\u6700\u8fd1\u7684\u4e09\u500b\u9130\u5c45\u70ba\u7d05\u8272\u6b63\u65b9\u5f62\u3002 KNN \u8ff4\u6b78\u5668 KNN \u540c\u6642\u4e5f\u80fd\u904b\u7528\u5728\u8ff4\u6b78\u554f\u984c\u4e0a\u9762\u3002\u8ff4\u6b78\u6a21\u578b\u8f38\u51fa\u7684\u7d50\u679c\u662f\u4e00\u500b\u9023\u7e8c\u6027\u6578\u503c\uff0c\u5176\u9810\u6e2c\u8a72\u503c\u662f k \u500b\u6700\u8fd1\u9130\u5c45\u8f38\u51fa\u7684\u5e73\u5747\u503c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\u7576 k=2 \u6642\uff0c\u5047\u8a2d\u6211\u5011\u6709\u4e00\u500b\u8f38\u5165\u7279\u5fb5 x \u8981\u9810\u6e2c\u7684\u8f38\u51fa\u70ba y\u3002\u7576\u6709\u4e00\u7b46\u65b0\u7684 x \u9032\u4f86\u7684\u6642\u5019\uff0c KNN \u8ff4\u6b78\u5668\u6703\u5c0b\u627e\u9130\u8fd1 2 \u500b x \u7684\u8f38\u51fa\u505a\u5e73\u5747\u7576\u4f5c\u662f\u8a72\u7b46\u8cc7\u6599\u7684\u9810\u6e2c\u7d50\u679c\u3002 KNN \u5ea6\u91cf\u8ddd\u96e2\u7684\u65b9\u6cd5 \u8981\u5224\u65b7\u90a3\u4e9b\u662f\u9130\u5c45\u7684\u8a71\uff0c\u9996\u5148\u8981\u91cf\u5316\u76f8\u4f3c\u5ea6\uff0c\u800c\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2 (Euclidean distance) \u662f\u6bd4\u8f03\u5e38\u7528\u7684\u65b9\u6cd5\u4f86\u91cf\u5ea6\u76f8\u4f3c\u5ea6\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u660e\u53ef\u592b\u65af\u57fa\u8ddd\u96e2(Sklearn \u9810\u8a2d)\u3001\u66fc\u54c8\u9813\u8ddd\u96e2\u3001\u67f4\u6bd4\u96ea\u592b\u8ddd\u96e2\u3001\u593e\u89d2\u9918\u5f26\u3001\u6f22\u660e\u8ddd\u96e2\u3001\u5091\u5361\u5fb7\u76f8\u4f3c\u4fc2\u6578 \u90fd\u53ef\u4ee5\u8a55\u4f30\u8ddd\u96e2\u7684\u9060\u8fd1\u3002 KNN \u8207 k-means \u52ff\u6df7\u6dc6 KNN \u7684\u7f3a\u9ede\u662f\u5c0d\u8cc7\u6599\u7684\u5c40\u90e8\u7d50\u69cb\u975e\u5e38\u654f\u611f\uff0c\u56e0\u6b64\u8abf\u6574\u9069\u7576\u7684 k \u503c\u6975\u70ba\u91cd\u8981\u3002\u53e6\u5916\u5927\u5bb6\u5f88\u5e38\u5c07 KNN \u8207 K-means \u6df7\u6dc6\uff0c\u96d6\u7136\u5169\u8005\u90fd\u6709 k \u503c\u8981\u8a2d\u5b9a\u4f46\u5176\u5be6\u5169\u8005\u7121\u4efb\u4f55\u95dc\u806f\u3002KNN \u7684 k \u662f\u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf\u63a1\u591a\u6578\u6c7a\u4f5c\u70ba\u8f38\u51fa\u7684\u4f9d\u64da\u3002\u800c K-means \u7684 k \u662f\u8a2d\u5b9a\u96c6\u7fa4\u7684\u985e\u5225\u4e2d\u5fc3\u9ede\u6578\u91cf\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] KNN \u5206\u985e\u5668 \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u70ba\u5206\u985e\u7bc4\u4f8b\uff0c\u4f7f\u7528 Sklearn \u5efa\u7acb k-nearest neighbors(KNN) \u6a21\u578b\u3002\u4ee5\u4e0b\u662f KNN \u5e38\u898b\u7684\u6a21\u578b\u64cd\u4f5c\u53c3\u6578\uff1a Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsClassifier # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsClassifier ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = knnModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , knnModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , knnModel . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9619047619047619 \u6e2c\u8a66\u96c6: 0.9555555555555556 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u5f9e\u5716\u4e2d\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u7684 Setosa \u5b8c\u6574\u7684\u88ab\u5206\u985e\u51fa\u4f86\uff0c\u800c\u6a58\u8272\u8207\u7da0\u8272\u7684\u5206\u4f48\u662f\u7dca\u5bc6\u76f8\u9023\u5728\u4ea4\u754c\u8655\u5206\u985e\u7684\u7d50\u679c\u6bd4\u8f03\u4e0d\u7a69\u5b9a\u3002\u4f46\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u7d50\u679c\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u6709\u767e\u5206\u4e4b95\u4ee5\u4e0a\u7684\u6e96\u78ba\u7387\u3002 KNN \u8ff4\u6b78\u5668 KNN \u4e0d\u50c5\u80fd\u5920\u4f5c\u70ba\u5206\u985e\u5668\uff0c\u4e5f\u53ef\u4ee5\u505a\u8ff4\u6b78\u9023\u7e8c\u6027\u7684\u6578\u503c\u9810\u6e2c\u3002\u5176\u9810\u6e2c\u503c\u70bak\u500b\u6700\u8fd1\u9130\u5c45\u7684\u503c\u7684\u5e73\u5747\u503c\u3002 Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsRegressor # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsRegressor ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = knnModel . predict ( x ) \u6a21\u578b\u8a55\u4f30 Sklearn \u4e2d KNN \u8ff4\u6b78\u6a21\u578b\u7684 score \u51fd\u5f0f\u662f R2 score\uff0c\u53ef\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u4f9d\u64da\uff0c\u5176\u6578\u503c\u8d8a\u63a5\u8fd1\u65bc1\u4ee3\u8868\u6a21\u578b\u8d8a\u4f73\u3002\u9664\u4e86 R2 score \u9084\u6709\u5176\u4ed6\u8a31\u591a\u8ff4\u6b78\u6a21\u578b\u7684\u8a55\u4f30\u65b9\u6cd5\uff0c\u4f8b\u5982\uff1a MSE\u3001MAE\u3001RMSE\u3002 from sklearn import metrics print ( 'R2 score: ' , knnModel . score ( x , y )) mse = metrics . mean_squared_error ( y , predicted ) print ( 'MSE score: ' , mse ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN"},{"location":"10.KNN/#day-10-knn","text":"","title":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN"},{"location":"10.KNN/#_1","text":"K-\u8fd1\u9130\u6f14\u7b97\u6cd5\u4ecb\u7d39 KNN \u6f14\u7b97\u6cd5\u89e3\u6790 KNN \u65bc\u5206\u985e\u5668\u548c\u8ff4\u6b78\u5668\u7684\u505a\u6cd5 \u6bd4\u8f03 KNN \u8207 k-means \u5dee\u7570 \u5be6\u4f5c KNN \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u5be6\u4f5c KNN \u5206\u985e\u5668\uff0c\u89c0\u5bdf\u4e0d\u540c k \u503c\u6703\u5c0d\u5206\u985e\u7d50\u679c\u9020\u6210\u4ec0\u9ebc\u5f71\u97ff \u5be6\u4f5c KNN \u8ff4\u6b78\u8ff4\u5668 \u7bc4\u4f8b\u7a0b\u5f0f KNN(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f KNN(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"10.KNN/#k-knn","text":"KNN \u7684\u5168\u540d K Nearest Neighbor \u662f\u5c6c\u65bc\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684 Supervised learning \u5176\u4e2d\u4e00\u7a2e\u7b97\u6cd5\uff0c\u9867\u540d\u601d\u7fa9\u5c31\u662f k \u500b\u6700\u63a5\u8fd1\u4f60\u7684 \u9130\u5c45 \u3002\u5206\u985e\u7684\u6a19\u6e96\u662f\u7531\u9130\u5c45\u300c\u591a\u6578\u8868\u6c7a\u300d\u6c7a\u5b9a\u7684\u3002\u5728 Sklearn \u4e2d KNN \u53ef\u4ee5\u7528\u4f5c\u5206\u985e\u6216\u8ff4\u6b78\u7684\u6a21\u578b\u3002","title":"K-\u8fd1\u9130\u6f14\u7b97\u6cd5 (KNN)"},{"location":"10.KNN/#knn","text":"\u5728\u5206\u985e\u554f\u984c\u4e2d KNN \u6f14\u7b97\u6cd5\u63a1\u591a\u6578\u6c7a\u6a19\u6e96\uff0c\u5229\u7528 k \u500b\u6700\u8fd1\u7684\u9130\u5c45\u4f86\u5224\u5b9a\u65b0\u7684\u8cc7\u6599\u662f\u5728\u54ea\u4e00\u7fa4\u3002\u5176\u6f14\u7b97\u6cd5\u6d41\u7a0b\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f7f\u7528\u8005\u5148\u6c7a\u5b9a k \u7684\u5927\u5c0f\u3002\u63a5\u8457\u8a08\u7b97\u76ee\u524d\u8a72\u7b46\u65b0\u7684\u8cc7\u6599\u8207\u9130\u8fd1\u7684\u8cc7\u6599\u9593\u7684\u8ddd\u96e2\u3002\u7b2c\u4e09\u6b65\u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44\u3002 \u6c7a\u5b9a k \u503c \u6c42\u6bcf\u500b\u9130\u5c45\u8ddf\u81ea\u5df1\u4e4b\u9593\u7684\u8ddd\u96e2 \u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44 \u5982\u679c\u9084\u662f\u6c92\u8fa6\u6cd5\u6c7a\u5b9a\u5728\u54ea\u4e00\u7d44\uff0c\u56de\u5230\u7b2c\u4e00\u6b65\u8abf\u6574 k \u503c\uff0c\u518d\u7e7c\u7e8c k \u7684\u5927\u5c0f\u6703\u5f71\u97ff\u6a21\u578b\u6700\u7d42\u7684\u5206\u985e\u7d50\u679c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u7da0\u8272\u9ede\u662f\u65b0\u7684\u8cc7\u6599\u3002\u7576 k \u7b49\u65bc 3 \u6642\u6703\u641c\u5c0b\u96e2\u7da0\u8272\u9ede\u6700\u8fd1\u7684\u9130\u5c45\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u4e09\u89d2\u5f62\u70ba\u9810\u6e2c\u7684\u7d50\u679c\u3002\u7576 k \u8a2d\u70ba 5 \u7684\u6642\u5019\u7d50\u679c\u53c8\u4e0d\u4e00\u6a23\u4e86\uff0c\u6211\u5011\u767c\u73fe\u8ddd\u96e2\u6700\u8fd1\u7684\u4e09\u500b\u9130\u5c45\u70ba\u7d05\u8272\u6b63\u65b9\u5f62\u3002","title":"KNN \u5206\u985e\u5668"},{"location":"10.KNN/#knn_1","text":"KNN \u540c\u6642\u4e5f\u80fd\u904b\u7528\u5728\u8ff4\u6b78\u554f\u984c\u4e0a\u9762\u3002\u8ff4\u6b78\u6a21\u578b\u8f38\u51fa\u7684\u7d50\u679c\u662f\u4e00\u500b\u9023\u7e8c\u6027\u6578\u503c\uff0c\u5176\u9810\u6e2c\u8a72\u503c\u662f k \u500b\u6700\u8fd1\u9130\u5c45\u8f38\u51fa\u7684\u5e73\u5747\u503c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\u7576 k=2 \u6642\uff0c\u5047\u8a2d\u6211\u5011\u6709\u4e00\u500b\u8f38\u5165\u7279\u5fb5 x \u8981\u9810\u6e2c\u7684\u8f38\u51fa\u70ba y\u3002\u7576\u6709\u4e00\u7b46\u65b0\u7684 x \u9032\u4f86\u7684\u6642\u5019\uff0c KNN \u8ff4\u6b78\u5668\u6703\u5c0b\u627e\u9130\u8fd1 2 \u500b x \u7684\u8f38\u51fa\u505a\u5e73\u5747\u7576\u4f5c\u662f\u8a72\u7b46\u8cc7\u6599\u7684\u9810\u6e2c\u7d50\u679c\u3002","title":"KNN \u8ff4\u6b78\u5668"},{"location":"10.KNN/#knn_2","text":"\u8981\u5224\u65b7\u90a3\u4e9b\u662f\u9130\u5c45\u7684\u8a71\uff0c\u9996\u5148\u8981\u91cf\u5316\u76f8\u4f3c\u5ea6\uff0c\u800c\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2 (Euclidean distance) \u662f\u6bd4\u8f03\u5e38\u7528\u7684\u65b9\u6cd5\u4f86\u91cf\u5ea6\u76f8\u4f3c\u5ea6\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u660e\u53ef\u592b\u65af\u57fa\u8ddd\u96e2(Sklearn \u9810\u8a2d)\u3001\u66fc\u54c8\u9813\u8ddd\u96e2\u3001\u67f4\u6bd4\u96ea\u592b\u8ddd\u96e2\u3001\u593e\u89d2\u9918\u5f26\u3001\u6f22\u660e\u8ddd\u96e2\u3001\u5091\u5361\u5fb7\u76f8\u4f3c\u4fc2\u6578 \u90fd\u53ef\u4ee5\u8a55\u4f30\u8ddd\u96e2\u7684\u9060\u8fd1\u3002","title":"KNN \u5ea6\u91cf\u8ddd\u96e2\u7684\u65b9\u6cd5"},{"location":"10.KNN/#knn-k-means","text":"KNN \u7684\u7f3a\u9ede\u662f\u5c0d\u8cc7\u6599\u7684\u5c40\u90e8\u7d50\u69cb\u975e\u5e38\u654f\u611f\uff0c\u56e0\u6b64\u8abf\u6574\u9069\u7576\u7684 k \u503c\u6975\u70ba\u91cd\u8981\u3002\u53e6\u5916\u5927\u5bb6\u5f88\u5e38\u5c07 KNN \u8207 K-means \u6df7\u6dc6\uff0c\u96d6\u7136\u5169\u8005\u90fd\u6709 k \u503c\u8981\u8a2d\u5b9a\u4f46\u5176\u5be6\u5169\u8005\u7121\u4efb\u4f55\u95dc\u806f\u3002KNN \u7684 k \u662f\u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf\u63a1\u591a\u6578\u6c7a\u4f5c\u70ba\u8f38\u51fa\u7684\u4f9d\u64da\u3002\u800c K-means \u7684 k \u662f\u8a2d\u5b9a\u96c6\u7fa4\u7684\u985e\u5225\u4e2d\u5fc3\u9ede\u6578\u91cf\u3002","title":"KNN \u8207 k-means \u52ff\u6df7\u6dc6"},{"location":"10.KNN/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"10.KNN/#knn_3","text":"\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u70ba\u5206\u985e\u7bc4\u4f8b\uff0c\u4f7f\u7528 Sklearn \u5efa\u7acb k-nearest neighbors(KNN) \u6a21\u578b\u3002\u4ee5\u4e0b\u662f KNN \u5e38\u898b\u7684\u6a21\u578b\u64cd\u4f5c\u53c3\u6578\uff1a Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsClassifier # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsClassifier ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = knnModel . predict ( X_train )","title":"KNN \u5206\u985e\u5668"},{"location":"10.KNN/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , knnModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , knnModel . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9619047619047619 \u6e2c\u8a66\u96c6: 0.9555555555555556 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u5f9e\u5716\u4e2d\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u7684 Setosa \u5b8c\u6574\u7684\u88ab\u5206\u985e\u51fa\u4f86\uff0c\u800c\u6a58\u8272\u8207\u7da0\u8272\u7684\u5206\u4f48\u662f\u7dca\u5bc6\u76f8\u9023\u5728\u4ea4\u754c\u8655\u5206\u985e\u7684\u7d50\u679c\u6bd4\u8f03\u4e0d\u7a69\u5b9a\u3002\u4f46\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u7d50\u679c\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u6709\u767e\u5206\u4e4b95\u4ee5\u4e0a\u7684\u6e96\u78ba\u7387\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"10.KNN/#knn_4","text":"KNN \u4e0d\u50c5\u80fd\u5920\u4f5c\u70ba\u5206\u985e\u5668\uff0c\u4e5f\u53ef\u4ee5\u505a\u8ff4\u6b78\u9023\u7e8c\u6027\u7684\u6578\u503c\u9810\u6e2c\u3002\u5176\u9810\u6e2c\u503c\u70bak\u500b\u6700\u8fd1\u9130\u5c45\u7684\u503c\u7684\u5e73\u5747\u503c\u3002 Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsRegressor # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsRegressor ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = knnModel . predict ( x )","title":"KNN \u8ff4\u6b78\u5668"},{"location":"10.KNN/#_3","text":"Sklearn \u4e2d KNN \u8ff4\u6b78\u6a21\u578b\u7684 score \u51fd\u5f0f\u662f R2 score\uff0c\u53ef\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u4f9d\u64da\uff0c\u5176\u6578\u503c\u8d8a\u63a5\u8fd1\u65bc1\u4ee3\u8868\u6a21\u578b\u8d8a\u4f73\u3002\u9664\u4e86 R2 score \u9084\u6709\u5176\u4ed6\u8a31\u591a\u8ff4\u6b78\u6a21\u578b\u7684\u8a55\u4f30\u65b9\u6cd5\uff0c\u4f8b\u5982\uff1a MSE\u3001MAE\u3001RMSE\u3002 from sklearn import metrics print ( 'R2 score: ' , knnModel . score ( x , y )) mse = metrics . mean_squared_error ( y , predicted ) print ( 'MSE score: ' , mse ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6a21\u578b\u8a55\u4f30"},{"location":"11.SVM/","text":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 SVM \u5206\u985e\u5668 \u4f55\u8b02\u652f\u6301\u5411\u91cf\u6a5f? \u975e\u7dda\u6027\u8207\u7dda\u6027? \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f\u3002 SVR \u8ff4\u6b78\u5668 \u5b78\u7fd2 SVR \u65b9\u6cd5\u5982\u4f55\u8655\u7406\u9023\u7e8c\u6027\u8f38\u51fa\u3002 SVM \u5206\u985e\u5668\u8207 SVR \u8ff4\u6b78\u5668\u624b\u628a\u624b\u5be6\u4f5c \u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002 \u67e5\u770b SVR \u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f SVM(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f SVR(Regression)\uff1a SVM \u5206\u985e\u5668 \u652f\u6301\u5411\u91cf\u6a5f (support vector machine, SVM) \u662f\u4e00\u500b\u57fa\u65bc\u7d71\u8a08\u5b78\u7fd2\u7684\u76e3\u7763\u5f0f\u6f14\u7b97\u6cd5\uff0c\u900f\u904e\u627e\u51fa\u4e00\u500b\u8d85\u5e73\u9762\uff0c\u4f7f\u4e4b\u5c07\u5169\u500b\u4e0d\u540c\u7684\u96c6\u5408\u5206\u958b\u3002\u4e00\u822c\u7684\u5206\u985e\u554f\u984c\u6211\u5011\u5c31\u662f\u8981\uff0c\u627e\u51fa\u5728\u4e0d\u540c\u7684\u8cc7\u6599\u985e\u5225\u4e2d\u7684\u5206\u9694\u7dda\u3002\u4f46\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u9019\u500b\u5206\u9694\u7dda\u975e\u5e38\u8907\u96dc\u4e14\u6709\u5f88\u591a\u7a2e\u53ef\u80fd\u3002\u7136\u800c SVM \u5c31\u662f\u8981\u5728\u9019\u5f88\u591a\u7a2e\u7684\u53ef\u80fd\u7576\u4e2d\u627e\u51fa\u6700\u4f73\u7684\u89e3\u3002SVM \u6f14\u7b97\u6cd5\u7684\u7cbe\u795e\u5c31\u662f\u627e\u51fa\u4e00\u689d\u5206\u9694\u7dda\u4f7f\u6240\u6709\u5728\u908a\u754c\u4e0a\u7684\u9ede\u96e2\u5f97\u8d8a\u9060\u8d8a\u597d\uff0c\u4f7f\u6a21\u578b\u62b5\u6297\u96dc\u8a0a\u7684\u80fd\u529b\u66f4\u4f73\u3002 SVM \u53ef\u5206\u70ba\u4ee5\u4e0b\u5169\u7a2e\uff1a - \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f - \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f\u5c31\u662f\u5728\u4e0b\u5716\u7bc4\u4f8b\u7684\u4e8c\u7dad\u5716\u5f62\u4e2d\u627e\u51fa\u4e00\u689d\u7dda\uff0c\u76ee\u6a19\u8b93\u9019\u689d\u76f4\u7dda\u8207\u5169\u500b\u985e\u5225\u4e4b\u9593\u7684\u9593\u9694\u5bec\u5ea6\u8ddd\u96e2\u6700\u5927\u5316\u3002\u5176\u4e2d\u96e2\u5169\u689d\u865b\u7dda(\u9593\u9694\u8d85\u5e73\u9762)\u8ddd\u96e2\u6700\u8fd1\u7684\u9ede\uff0c\u5c31\u7a31\u70ba\u652f\u6301\u5411\u91cf (support vector)\u3002 \u7576\u7136\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f80\u5f80\u7a0d\u5fae\u8907\u96dc\uff0c\u90a3\u5982\u679c\u4e0d\u662f\u7dda\u6027\u53ef\u5206\u96c6\u5408\u600e\u9ebc\u8fa6\u5462\uff1f\u6211\u5011\u53ef\u4ee5\u904b\u7528\u6838\u51fd\u6578(kernel function) \u5e6b\u6211\u5011\u9020\u51fa\u4e0d\u53ef\u5206\u7684\u5206\u5272\u5e73\u9762\u3002 \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u9664\u4e86\u9032\u884c\u7dda\u6027\u5206\u985e\u4e4b\u5916 SVM \u9084\u53ef\u4ee5\u4f7f\u7528\u6838\u6280\u5de7\u6709\u6548\u5730\u9032\u884c\u975e\u7dda\u6027\u5206\u985e\uff0c\u5c07\u5176\u8f38\u5165\u7684\u8cc7\u6599\u6295\u5230\u66f4\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\uff0c\u4e26\u5728\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\u9032\u884c\u9ad8\u7dad\u5ea6\u7684\u5206\u985e\u6216\u964d\u7dad\u3002\u7c21\u55ae\u4f86\u8aaa\u900f\u904e\u591a\u7dad\u5ea6\u7684\u6295\u5f71\u6280\u5de7\uff0c\u5c07\u539f\u672c\u5728\u4e8c\u7dad\u7a7a\u9593\u4e2d\u4e0d\u53ef\u5206\u7684\u9ede\u5230\u4e86\u4e09\u7dad\u7a7a\u9593\u5c31\u53ef\u5206\u4e86\u3002\u4f46\u662f\u96a8\u8457\u8cc7\u6599\u91cf\u589e\u52a0\u5176\u904b\u7b97\u4e5f\u6703\u8b8a\u591a\uff0c\u76f8\u5c0d\u7684\u57f7\u884c\u901f\u5ea6\u5c31\u6703\u8b8a\u6162\u3002 \u5169\u500b\u975e\u7dda\u6027\u7684 Kernel\uff1a - Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db - Radial Basis Function \u9ad8\u65af\u8f49\u63db \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f SVM \u6f14\u7b97\u6cd5\u6700\u521d\u662f\u70ba\u4e8c\u5143\u5206\u985e\u554f\u984c\u6240\u8a2d\u8a08\u7684\uff0c\u4f46\u662f\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u4f8b\u5b50\u4e00\u5b9a\u4e0d\u53ea\u6709\u5169\u985e\u7684\u554f\u984c\u8981\u89e3\u6c7a\u3002\u4ed6\u7684\u89e3\u6c7a\u65b9\u5f0f\u8207 [Day 9 \u908f\u8f2f\u8ff4\u6b78] \u6240\u63d0\u5230\u7684\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u6a23\u7684\u3002\u4e3b\u8981\u662f\u5c07\u4e00\u500b\u591a\u5143\u5206\u985e\u554f\u984c\u8f49\u63db\u70ba\u591a\u500b\u4e8c\u5143\u5206\u985e\u554f\u984c\u3002\u5e38\u898b\u65b9\u6cd5\u5305\u62ec one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u3002 one-vs-rest(OvR) \u5c07\u67d0\u500b\u985e\u5225\u7684\u6a23\u672c\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u6a23\u672c\u6b78\u70ba\u53e6\u4e00\u985e many-vs-many(MvM) \u5728\u4efb\u610f\u5169\u985e\u6a23\u672c\u4e4b\u9593\u8a2d\u8a08\u4e00\u500b SVM \u8a73\u7d30\u4ecb\u7d39\u53ef\u4ee5\u53c3\u8003 [Day 9 \u908f\u8f2f\u8ff4\u6b78] SVR \u8ff4\u6b78\u5668 \u652f\u6301\u5411\u91cf\u6a5f\uff08SVM\uff09\u662f\u5c08\u9580\u8655\u7406\u5206\u985e\u7684\u554f\u984c\uff0c\u9084\u6709\u53e6\u4e00\u500b\u540d\u8a5e\u7a31\u70ba\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09\u5c08\u9580\u8655\u7406\u8ff4\u6b78\u554f\u984c\u3002SVR \u662f SVM \u7684\u5ef6\u4f38\uff0c\u800c\u652f\u6301\u5411\u91cf\u8ff4\u6b78\u53ea\u8981 f(x) \u8207 y \u504f\u96e2\u7a0b\u5ea6\u4e0d\u8981\u592a\u5927\uff0c\u65e2\u53ef\u4ee5\u8a8d\u70ba\u9810\u6e2c\u6b63\u78ba\u3002\u5982\u4e0b\u5716\u4e2d\u7684\u8ff4\u6b78\u7bc4\u4f8b\uff0c\u5728\u7dda\u6027\u7684 SVR \u6a21\u578b\u4e2d\u6703\u5728\u5de6\u53f3\u52a0\u4e0a \ud835\udf00 \u4f5c\u70ba\u6a21\u578b\u5bb9\u5fcd\u7684\u5340\u9593\u3002\u56e0\u6b64\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ea\u6709\u5728\u865b\u7dda\u4ee5\u5916\u7684\u8aa4\u5dee\u624d\u6703\u88ab\u8a08\u7b97\u3002\u6b64\u5916 SVR \u4e5f\u63d0\u4f9b\u4e86\u7dda\u6027\u8207\u975e\u7dda\u6027\u7684\u6838\u6280\u5de7\uff0c\u5176\u4e2d\u5728\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d\u53ef\u4ee5\u4f7f\u7528\u9ad8\u6b21\u65b9\u8f49\u63db\u6216\u662f\u9ad8\u65af\u8f49\u63db\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u652f\u6301\u5411\u91cf\u6a5f (Support Vector Machine, SVM) \u6a21\u578b SVM \u80fd\u5920\u900f\u904e\u8d85\u53c3\u6578 C \u4f86\u9054\u5230 weight regularization \u4f86\u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u3002\u9664\u4e86\u9019\u9ede\u6211\u5011\u9084\u80fd\u900f\u904e SVM \u7684 Kernel trick \u7684\u65b9\u5f0f\u5c07\u8cc7\u6599\u505a\u975e\u7dda\u6027\u8f49\u63db\uff0c\u5e38\u898b\u7684 kernel \u9664\u4e86 linear \u7dda\u6027\u4ee5\u5916\u9084\u6709\u5169\u4e86\u975e\u7dda\u6027\u7684 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u4ee5\u53ca Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 \u56db\u7a2e\u4e0d\u540cSVC\u5206\u985e\u5668: 1. LinearSVC (\u7dda\u6027) 2. kernel='linear' (\u7dda\u6027) 3. kernel='poly' (\u975e\u7dda\u6027) 4. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 LinearSVC Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - max_iter: \u6700\u5927\u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d1000\u3002 from sklearn import svm # \u5efa\u7acb linearSvc \u6a21\u578b linearSvcModel = svm . LinearSVC ( C = 1 , max_iter = 10000 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearSvcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearSvcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = linearSvcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.96 kernel='linear' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b svcModel = svm . SVC ( kernel = 'linear' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b svcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = svcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = svcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 kernel='poly' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVC ( kernel = 'poly' , degree = 3 , gamma = 'auto' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = polyModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 kernel='rbf' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVC ( kernel = 'rbf' , gamma = 0.7 , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = rbfModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 \u6211\u5011\u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002\u4ee5\u4e0b\u7bc4\u4f8b\u5c07\u539f\u5148 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u56db\u500b\u7279\u5fb5\u900f\u904e PCA \u964d\u6210\u4e8c\u7dad\uff0c\u4ee5\u5229\u6211\u5011\u505a\u8996\u89ba\u5316\u89c0\u5bdf\u3002\u900f\u904e\u56db\u7a2e\u4e0d\u540c\u7684 SVC \u5be6\u9a57\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e0d\u540c\u7684\u6838\u6280\u5de7\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u6c7a\u7b56\u908a\u7dda\u90fd\u4e0d\u76e1\u76f8\u540c\u3002\u7136\u800c\u8d8a\u8907\u96dc\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u908a\u754c\u5c31\u6703\u8b8a\u5f97\u8d8a\u626d\u66f2\uff0c\u56e0\u70ba\u975e\u7dda\u6027\u7684\u6a21\u578b\u80fd\u5920\u6709\u6bd4\u8f03\u597d\u7684\u64ec\u5408\u4f7f\u5f97\u932f\u8aa4\u7387\u964d\u4f4e\u3002 \u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09 \u6a21\u578b \u5728 Sklearn \u4e2d SVM \u63d0\u4f9b\u8ff4\u6b78\u7684\u6a21\u578b\u7a31\u4f5c SVR\u3002\u6b64\u5916 SVR \u8ff4\u6b78\u5668\u4e5f\u63d0\u4f9b\u4e86\u4e09\u7a2e\u4e0d\u540c\u7684\u6838\u51fd\u6578\uff0c\u5206\u5225\u6709\u4e00\u500b\u7dda\u6027\u4ee5\u53ca\u5169\u500b\u975e\u7dda\u6027\u7684\u6a21\u578b\u53ef\u4ee5\u547c\u53eb\u3002\u5728 SVR \u8ff4\u6b78\u7684\u5be6\u9a57\uff0c\u6211\u5011\u62ff\u4e00\u7d44\u975e\u7dda\u6027\u7684\u8cc7\u6599\u4f5c\u70ba\u4f8b\u5b50\u3002\u4e26\u67e5\u770b\u5728\u4e0d\u540c\u7684\u6838\u6280\u5de7\u4e0b\u6a21\u578b\u6240\u64ec\u5408\u7684\u6210\u6548\u70ba\u4f55\uff1f \u4e09\u7a2e\u4e0d\u540cSVR\u8ff4\u6b78\u5668: 1. kernel='linear' (\u7dda\u6027) 2. kernel='poly' (\u975e\u7dda\u6027) 3. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 kernel='linear' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b linearModel = svm . SVR ( C = 1 , kernel = 'linear' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 5.903802524650818 kernel='poly' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u9810\u6e2c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVR ( C = 6 , kernel = 'poly' , degree = 3 , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 8.296270605383441 kernel='rbf' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVR ( C = 6 , kernel = 'rbf' , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 2.2551572190243157 \u9019\u88e1\u7684\u8ff4\u6b78\u6a21\u578b\u63a1\u7528\u975e\u7dda\u6027\u7684\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u7684\u5be6\u9a57\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u7684\u6838\u51fd\u6578\u7121\u6cd5\u6709\u6548\u7684\u9810\u6e2c\u6240\u6709\u6578\u64da\u9ede\u7684\u8da8\u52e2\u3002\u800c\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d RBF \u7684\u6a21\u578b\u5c0d\u65bc\u6b64\u8cc7\u6599\u6709\u6bd4\u8f03\u597d\u7684\u9810\u6e2c\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM)"},{"location":"11.SVM/#day-11-svm","text":"","title":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM)"},{"location":"11.SVM/#_1","text":"SVM \u5206\u985e\u5668 \u4f55\u8b02\u652f\u6301\u5411\u91cf\u6a5f? \u975e\u7dda\u6027\u8207\u7dda\u6027? \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f\u3002 SVR \u8ff4\u6b78\u5668 \u5b78\u7fd2 SVR \u65b9\u6cd5\u5982\u4f55\u8655\u7406\u9023\u7e8c\u6027\u8f38\u51fa\u3002 SVM \u5206\u985e\u5668\u8207 SVR \u8ff4\u6b78\u5668\u624b\u628a\u624b\u5be6\u4f5c \u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002 \u67e5\u770b SVR \u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f SVM(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f SVR(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"11.SVM/#svm","text":"\u652f\u6301\u5411\u91cf\u6a5f (support vector machine, SVM) \u662f\u4e00\u500b\u57fa\u65bc\u7d71\u8a08\u5b78\u7fd2\u7684\u76e3\u7763\u5f0f\u6f14\u7b97\u6cd5\uff0c\u900f\u904e\u627e\u51fa\u4e00\u500b\u8d85\u5e73\u9762\uff0c\u4f7f\u4e4b\u5c07\u5169\u500b\u4e0d\u540c\u7684\u96c6\u5408\u5206\u958b\u3002\u4e00\u822c\u7684\u5206\u985e\u554f\u984c\u6211\u5011\u5c31\u662f\u8981\uff0c\u627e\u51fa\u5728\u4e0d\u540c\u7684\u8cc7\u6599\u985e\u5225\u4e2d\u7684\u5206\u9694\u7dda\u3002\u4f46\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u9019\u500b\u5206\u9694\u7dda\u975e\u5e38\u8907\u96dc\u4e14\u6709\u5f88\u591a\u7a2e\u53ef\u80fd\u3002\u7136\u800c SVM \u5c31\u662f\u8981\u5728\u9019\u5f88\u591a\u7a2e\u7684\u53ef\u80fd\u7576\u4e2d\u627e\u51fa\u6700\u4f73\u7684\u89e3\u3002SVM \u6f14\u7b97\u6cd5\u7684\u7cbe\u795e\u5c31\u662f\u627e\u51fa\u4e00\u689d\u5206\u9694\u7dda\u4f7f\u6240\u6709\u5728\u908a\u754c\u4e0a\u7684\u9ede\u96e2\u5f97\u8d8a\u9060\u8d8a\u597d\uff0c\u4f7f\u6a21\u578b\u62b5\u6297\u96dc\u8a0a\u7684\u80fd\u529b\u66f4\u4f73\u3002 SVM \u53ef\u5206\u70ba\u4ee5\u4e0b\u5169\u7a2e\uff1a - \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f - \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f","title":"SVM \u5206\u985e\u5668"},{"location":"11.SVM/#_2","text":"\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f\u5c31\u662f\u5728\u4e0b\u5716\u7bc4\u4f8b\u7684\u4e8c\u7dad\u5716\u5f62\u4e2d\u627e\u51fa\u4e00\u689d\u7dda\uff0c\u76ee\u6a19\u8b93\u9019\u689d\u76f4\u7dda\u8207\u5169\u500b\u985e\u5225\u4e4b\u9593\u7684\u9593\u9694\u5bec\u5ea6\u8ddd\u96e2\u6700\u5927\u5316\u3002\u5176\u4e2d\u96e2\u5169\u689d\u865b\u7dda(\u9593\u9694\u8d85\u5e73\u9762)\u8ddd\u96e2\u6700\u8fd1\u7684\u9ede\uff0c\u5c31\u7a31\u70ba\u652f\u6301\u5411\u91cf (support vector)\u3002 \u7576\u7136\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f80\u5f80\u7a0d\u5fae\u8907\u96dc\uff0c\u90a3\u5982\u679c\u4e0d\u662f\u7dda\u6027\u53ef\u5206\u96c6\u5408\u600e\u9ebc\u8fa6\u5462\uff1f\u6211\u5011\u53ef\u4ee5\u904b\u7528\u6838\u51fd\u6578(kernel function) \u5e6b\u6211\u5011\u9020\u51fa\u4e0d\u53ef\u5206\u7684\u5206\u5272\u5e73\u9762\u3002","title":"\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#_3","text":"\u9664\u4e86\u9032\u884c\u7dda\u6027\u5206\u985e\u4e4b\u5916 SVM \u9084\u53ef\u4ee5\u4f7f\u7528\u6838\u6280\u5de7\u6709\u6548\u5730\u9032\u884c\u975e\u7dda\u6027\u5206\u985e\uff0c\u5c07\u5176\u8f38\u5165\u7684\u8cc7\u6599\u6295\u5230\u66f4\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\uff0c\u4e26\u5728\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\u9032\u884c\u9ad8\u7dad\u5ea6\u7684\u5206\u985e\u6216\u964d\u7dad\u3002\u7c21\u55ae\u4f86\u8aaa\u900f\u904e\u591a\u7dad\u5ea6\u7684\u6295\u5f71\u6280\u5de7\uff0c\u5c07\u539f\u672c\u5728\u4e8c\u7dad\u7a7a\u9593\u4e2d\u4e0d\u53ef\u5206\u7684\u9ede\u5230\u4e86\u4e09\u7dad\u7a7a\u9593\u5c31\u53ef\u5206\u4e86\u3002\u4f46\u662f\u96a8\u8457\u8cc7\u6599\u91cf\u589e\u52a0\u5176\u904b\u7b97\u4e5f\u6703\u8b8a\u591a\uff0c\u76f8\u5c0d\u7684\u57f7\u884c\u901f\u5ea6\u5c31\u6703\u8b8a\u6162\u3002 \u5169\u500b\u975e\u7dda\u6027\u7684 Kernel\uff1a - Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db - Radial Basis Function \u9ad8\u65af\u8f49\u63db","title":"\u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#_4","text":"SVM \u6f14\u7b97\u6cd5\u6700\u521d\u662f\u70ba\u4e8c\u5143\u5206\u985e\u554f\u984c\u6240\u8a2d\u8a08\u7684\uff0c\u4f46\u662f\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u4f8b\u5b50\u4e00\u5b9a\u4e0d\u53ea\u6709\u5169\u985e\u7684\u554f\u984c\u8981\u89e3\u6c7a\u3002\u4ed6\u7684\u89e3\u6c7a\u65b9\u5f0f\u8207 [Day 9 \u908f\u8f2f\u8ff4\u6b78] \u6240\u63d0\u5230\u7684\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u6a23\u7684\u3002\u4e3b\u8981\u662f\u5c07\u4e00\u500b\u591a\u5143\u5206\u985e\u554f\u984c\u8f49\u63db\u70ba\u591a\u500b\u4e8c\u5143\u5206\u985e\u554f\u984c\u3002\u5e38\u898b\u65b9\u6cd5\u5305\u62ec one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u3002 one-vs-rest(OvR) \u5c07\u67d0\u500b\u985e\u5225\u7684\u6a23\u672c\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u6a23\u672c\u6b78\u70ba\u53e6\u4e00\u985e many-vs-many(MvM) \u5728\u4efb\u610f\u5169\u985e\u6a23\u672c\u4e4b\u9593\u8a2d\u8a08\u4e00\u500b SVM \u8a73\u7d30\u4ecb\u7d39\u53ef\u4ee5\u53c3\u8003 [Day 9 \u908f\u8f2f\u8ff4\u6b78]","title":"\u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#svr","text":"\u652f\u6301\u5411\u91cf\u6a5f\uff08SVM\uff09\u662f\u5c08\u9580\u8655\u7406\u5206\u985e\u7684\u554f\u984c\uff0c\u9084\u6709\u53e6\u4e00\u500b\u540d\u8a5e\u7a31\u70ba\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09\u5c08\u9580\u8655\u7406\u8ff4\u6b78\u554f\u984c\u3002SVR \u662f SVM \u7684\u5ef6\u4f38\uff0c\u800c\u652f\u6301\u5411\u91cf\u8ff4\u6b78\u53ea\u8981 f(x) \u8207 y \u504f\u96e2\u7a0b\u5ea6\u4e0d\u8981\u592a\u5927\uff0c\u65e2\u53ef\u4ee5\u8a8d\u70ba\u9810\u6e2c\u6b63\u78ba\u3002\u5982\u4e0b\u5716\u4e2d\u7684\u8ff4\u6b78\u7bc4\u4f8b\uff0c\u5728\u7dda\u6027\u7684 SVR \u6a21\u578b\u4e2d\u6703\u5728\u5de6\u53f3\u52a0\u4e0a \ud835\udf00 \u4f5c\u70ba\u6a21\u578b\u5bb9\u5fcd\u7684\u5340\u9593\u3002\u56e0\u6b64\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ea\u6709\u5728\u865b\u7dda\u4ee5\u5916\u7684\u8aa4\u5dee\u624d\u6703\u88ab\u8a08\u7b97\u3002\u6b64\u5916 SVR \u4e5f\u63d0\u4f9b\u4e86\u7dda\u6027\u8207\u975e\u7dda\u6027\u7684\u6838\u6280\u5de7\uff0c\u5176\u4e2d\u5728\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d\u53ef\u4ee5\u4f7f\u7528\u9ad8\u6b21\u65b9\u8f49\u63db\u6216\u662f\u9ad8\u65af\u8f49\u63db\u3002","title":"SVR \u8ff4\u6b78\u5668"},{"location":"11.SVM/#_5","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"11.SVM/#support-vector-machine-svm","text":"SVM \u80fd\u5920\u900f\u904e\u8d85\u53c3\u6578 C \u4f86\u9054\u5230 weight regularization \u4f86\u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u3002\u9664\u4e86\u9019\u9ede\u6211\u5011\u9084\u80fd\u900f\u904e SVM \u7684 Kernel trick \u7684\u65b9\u5f0f\u5c07\u8cc7\u6599\u505a\u975e\u7dda\u6027\u8f49\u63db\uff0c\u5e38\u898b\u7684 kernel \u9664\u4e86 linear \u7dda\u6027\u4ee5\u5916\u9084\u6709\u5169\u4e86\u975e\u7dda\u6027\u7684 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u4ee5\u53ca Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 \u56db\u7a2e\u4e0d\u540cSVC\u5206\u985e\u5668: 1. LinearSVC (\u7dda\u6027) 2. kernel='linear' (\u7dda\u6027) 3. kernel='poly' (\u975e\u7dda\u6027) 4. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002","title":"\u652f\u6301\u5411\u91cf\u6a5f (Support Vector Machine, SVM) \u6a21\u578b"},{"location":"11.SVM/#linearsvc","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - max_iter: \u6700\u5927\u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d1000\u3002 from sklearn import svm # \u5efa\u7acb linearSvc \u6a21\u578b linearSvcModel = svm . LinearSVC ( C = 1 , max_iter = 10000 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearSvcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearSvcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = linearSvcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.96","title":"LinearSVC"},{"location":"11.SVM/#kernellinear","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b svcModel = svm . SVC ( kernel = 'linear' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b svcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = svcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = svcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97","title":"kernel='linear'"},{"location":"11.SVM/#kernelpoly","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVC ( kernel = 'poly' , degree = 3 , gamma = 'auto' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = polyModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97","title":"kernel='poly'"},{"location":"11.SVM/#kernelrbf","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVC ( kernel = 'rbf' , gamma = 0.7 , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = rbfModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 \u6211\u5011\u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002\u4ee5\u4e0b\u7bc4\u4f8b\u5c07\u539f\u5148 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u56db\u500b\u7279\u5fb5\u900f\u904e PCA \u964d\u6210\u4e8c\u7dad\uff0c\u4ee5\u5229\u6211\u5011\u505a\u8996\u89ba\u5316\u89c0\u5bdf\u3002\u900f\u904e\u56db\u7a2e\u4e0d\u540c\u7684 SVC \u5be6\u9a57\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e0d\u540c\u7684\u6838\u6280\u5de7\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u6c7a\u7b56\u908a\u7dda\u90fd\u4e0d\u76e1\u76f8\u540c\u3002\u7136\u800c\u8d8a\u8907\u96dc\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u908a\u754c\u5c31\u6703\u8b8a\u5f97\u8d8a\u626d\u66f2\uff0c\u56e0\u70ba\u975e\u7dda\u6027\u7684\u6a21\u578b\u80fd\u5920\u6709\u6bd4\u8f03\u597d\u7684\u64ec\u5408\u4f7f\u5f97\u932f\u8aa4\u7387\u964d\u4f4e\u3002","title":"kernel='rbf'"},{"location":"11.SVM/#support-vector-regression-svr","text":"\u5728 Sklearn \u4e2d SVM \u63d0\u4f9b\u8ff4\u6b78\u7684\u6a21\u578b\u7a31\u4f5c SVR\u3002\u6b64\u5916 SVR \u8ff4\u6b78\u5668\u4e5f\u63d0\u4f9b\u4e86\u4e09\u7a2e\u4e0d\u540c\u7684\u6838\u51fd\u6578\uff0c\u5206\u5225\u6709\u4e00\u500b\u7dda\u6027\u4ee5\u53ca\u5169\u500b\u975e\u7dda\u6027\u7684\u6a21\u578b\u53ef\u4ee5\u547c\u53eb\u3002\u5728 SVR \u8ff4\u6b78\u7684\u5be6\u9a57\uff0c\u6211\u5011\u62ff\u4e00\u7d44\u975e\u7dda\u6027\u7684\u8cc7\u6599\u4f5c\u70ba\u4f8b\u5b50\u3002\u4e26\u67e5\u770b\u5728\u4e0d\u540c\u7684\u6838\u6280\u5de7\u4e0b\u6a21\u578b\u6240\u64ec\u5408\u7684\u6210\u6548\u70ba\u4f55\uff1f \u4e09\u7a2e\u4e0d\u540cSVR\u8ff4\u6b78\u5668: 1. kernel='linear' (\u7dda\u6027) 2. kernel='poly' (\u975e\u7dda\u6027) 3. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002","title":"\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09 \u6a21\u578b"},{"location":"11.SVM/#kernellinear_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b linearModel = svm . SVR ( C = 1 , kernel = 'linear' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 5.903802524650818","title":"kernel='linear'"},{"location":"11.SVM/#kernelpoly_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u9810\u6e2c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVR ( C = 6 , kernel = 'poly' , degree = 3 , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 8.296270605383441","title":"kernel='poly'"},{"location":"11.SVM/#kernelrbf_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVR ( C = 6 , kernel = 'rbf' , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 2.2551572190243157 \u9019\u88e1\u7684\u8ff4\u6b78\u6a21\u578b\u63a1\u7528\u975e\u7dda\u6027\u7684\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u7684\u5be6\u9a57\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u7684\u6838\u51fd\u6578\u7121\u6cd5\u6709\u6548\u7684\u9810\u6e2c\u6240\u6709\u6578\u64da\u9ede\u7684\u8da8\u52e2\u3002\u800c\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d RBF \u7684\u6a21\u578b\u5c0d\u65bc\u6b64\u8cc7\u6599\u6709\u6bd4\u8f03\u597d\u7684\u9810\u6e2c\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"kernel='rbf'"},{"location":"12.\u6c7a\u7b56\u6a39/","text":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4ecb\u7d39 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u6c7a\u7b56\u6a39\u5206\u985e\u5668 \u89c0\u5bdf\u6c7a\u7b56\u6a39\u662f\u5982\u4f55\u751f\u6210\u7684\u3002 \u5be6\u4f5c\u6c7a\u7b56\u6a39\u8ff4\u6b78\u5668 \u67e5\u770b\u6c7a\u7b56\u6a39\u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Regression)\uff1a \u6c7a\u7b56\u6a39 \u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u7522\u751f\u4e00\u68f5\u6a39\uff0c\u4f9d\u64da\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u4f86\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u65b9\u5f0f\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de(\u4e82\u5ea6)\uff0c\u4f8b\u5982\u50cf\u662f Information gain\u3001Gain ratio\u3001Gini index\u3002\u4f9d\u64da\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u5408\u9069\u7684\u898f\u5247\uff0c\u6700\u7d42\u751f\u6210\u4e00\u500b\u898f\u5247\u6a39\u4f86\u6c7a\u7b56\u6240\u6709\u4e8b\u60c5\uff0c\u5176\u76ee\u7684\u4f7f\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5c31\u597d\u6bd4\u6211\u5011\u8a55\u4f30\u4eca\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\uff0c\u5929\u6c23\u56e0\u5b50\u53ef\u80fd\u7ad9\u6bd4\u8f03\u5927\u7684\u56e0\u7d20\uff0c\u800c Co2 \u7684\u6fc3\u5ea6\u9ad8\u4f4e\u53ef\u80fd\u4f54\u7684\u56e0\u5b50\u7a0b\u5ea6\u8f03\u4f4e\u3002\u56e0\u6b64\u5728\u7b2c\u4e00\u5c64\u7684\u6c7a\u7b56\u4e2d\u4ee5\u5929\u6c23\u7684\u7279\u5fb5\u5148\u9032\u884c\u7b2c\u4e00\u6b21\u7684\u6c7a\u7b56\u5224\u65b7\u3002\u63a5\u8457\u7b2c\u4e8c\u5c64\u518d\u5f9e\u6240\u6709\u7279\u5fb5\u4e2d\u5c0b\u627e\u6700\u9069\u5408\u7684\u6c7a\u7b56\u56e0\u5b50\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6a39\u7684\u6df1\u5ea6\u5373\u505c\u6b62\u6a39\u7684\u751f\u9577\u3002 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u6c7a\u7b56\u6a39\u662f\u4ee5\u4e00\u500b\u8caa\u5a6a\u6cd5\u5247\u4f86\u6c7a\u5b9a\u6bcf\u4e00\u5c64\u8981\u554f\u4ec0\u9ebc\u554f\u984c\uff0c\u76ee\u6a19\u662f\u5206\u985e\u904e\u5f8c\u6bcf\u4e00\u7fa4\u80fd\u5920\u5f88\u660e\u986f\u7684\u77e5\u9053\u662f\u5c6c\u65bc\u54ea\u4e00\u7a2e\u985e\u5225\u3002\u5ef6\u7e8c\u4e0a\u9762\u7684\u4f8b\u5b50\uff0c\u4ee5\u5206\u985e\u554f\u984c\u4f86\u8aaa\u5047\u8a2d\u8981\u8a55\u4f30\u660e\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\u3002\u5728\u6a39\u7684\u7b2c\u4e00\u5c64\u7bc0\u9ede\u4e2d\u6211\u5011\u8981\u5f9e\u5df2\u77e5\u7684\u5169\u500b\u7279\u5fb5\u5206\u5225\u662f\u6eab\u5ea6\u8207\u7279\u5fb5\u9078\u4e00\u500b\u4f5c\u70ba\u8a72\u5c64\u7684\u6c7a\u7b56\u56e0\u5b50\u3002\u5047\u8a2d\u76ee\u524d\u8a13\u7df4\u96c6\u6709\u4e94\u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u6b63\u5e38\u8209\u884c\u7684\u6709\u5169\u7b46\u8cc7\u6599\uff0c\u53d6\u6d88\u8209\u884c\u7684\u6709\u4e09\u7b46\u8cc7\u6599\u3002\u5728\u6a39\u7684\u7d50\u69cb\u4e2d\u5de6\u5b50\u6a39\u70ba\u6c7a\u7b56\u6b63\u5e38\u53d6\u884c\uff0c\u800c\u53f3\u5b50\u6a39\u662f\u6c7a\u7b56\u53d6\u6d88\u8209\u884c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7576\u7279\u5fb5\u70ba\u5929\u6c23\u7684\u6642\u5019\u53ef\u4ee5\u4e00\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5169\u985e\u5225\u5b8c\u6574\u5206\u958b\uff0c\u56e0\u6b64\u6211\u5011\u6703\u5c07\u5929\u6c23\u4f5c\u70ba\u9019\u4e00\u5c64\u5224\u65b7\u7684\u56e0\u5b50\u3002\u9019\u5c31\u662f\u6c7a\u7b56\u6a39\u5728\u751f\u6210\u4e2d\u7684\u8caa\u5a6a\u6a5f\u5236\u3002\u7136\u800c\u8981\u5982\u4f55\u53bb\u5224\u65b7\u6bcf\u6b21\u6c7a\u7b56\u7684\u597d\u58de\uff0c\u5c31\u5fc5\u9808\u4f9d\u9760\u4e82\u5ea6\u7684\u8a55\u4f30\u6307\u6a19\u3002 \u6c7a\u7b56\u6a39\u7684\u6df7\u4e82\u8a55\u4f30\u6307\u6a19 \u6211\u5011\u9700\u8981\u5ba2\u89c0\u7684\u6a19\u6e96\u4f86\u6c7a\u5b9a\u6c7a\u7b56\u6a39\u7684\u6bcf\u500b\u5206\u652f\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u6709\u4e00\u500b\u8a55\u65b7\u7684\u6307\u6a19\u4f86\u5354\u52a9\u6211\u5011\u6c7a\u7b56\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u6307\u6a19\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de\uff0c\u5e38\u898b\u7684\u6c7a\u7b56\u4e82\u5ea6\u8a55\u4f30\u6307\u6a19\u6709 Information gain\u3001Gain ratio\u3001Gini index\u3002\u6211\u5011\u76ee\u6a19\u662f\u5f9e\u8a13\u7df4\u8cc7\u6599\u4e2d\u627e\u51fa\u4e00\u5957\u6c7a\u7b56\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u4ee5\u4e0a\u7684\u6307\u6a19\u90fd\u662f\u5728\u8861\u91cf\u4e00\u500b\u5e8f\u5217\u4e2d\u7684\u6df7\u4e82\u7a0b\u5ea6\uff0c\u5176\u6578\u503c\u8d8a\u9ad8\u4ee3\u8868\u8d8a\u6df7\u4e82\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u9810\u8a2d\u4f7f\u7528 Gini\u3002 Information gain (\u8cc7\u8a0a\u7372\u5229) Gain ratio (\u5409\u5c3c\u7372\u5229) Gini index (\u5409\u5c3c\u4fc2\u6578) = Gini Impurity (\u5409\u5c3c\u4e0d\u7d14\u5ea6) \u8a55\u4f30\u5206\u5272\u8cc7\u8a0a\u91cf Information Gain \u900f\u904e\u5f9e\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5176\u7b97\u6cd5\u4e3b\u8981\u662f\u8a08\u7b97\u71b5\uff0c\u56e0\u6b64\u7d93\u7531\u6c7a\u7b56\u6a39\u5206\u5272\u5f8c\u7684\u8cc7\u8a0a\u91cf\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u800c Gini \u7684\u6578\u503c\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u4e82\uff0c\u6578\u503c\u7686\u70ba 0~1 \u4e4b\u9593\uff0c\u5176\u4e2d 0 \u4ee3\u8868\u8a72\u7279\u5fb5\u5728\u5e8f\u5217\u4e2d\u662f\u5b8c\u7f8e\u7684\u5206\u985e\u3002\u5e38\u898b\u7684\u8cc7\u8a0a\u91cf\u8a55\u4f30\u65b9\u6cd5\u6709\u5169\u7a2e\uff1a\u8cc7\u8a0a\u7372\u5229 (Information Gain) \u4ee5\u53ca Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)\u3002 \u71b5 (Entropy) \u71b5 (Entropy) \u662f\u8a08\u7b97 Information Gain \u7684\u4e00\u7a2e\u65b9\u6cd5\u3002\u5728\u4e86\u89e3 Information Gain \u4e4b\u524d\u8981\u5148\u4e86\u89e3\u71b5\u662f\u5982\u4f55\u88ab\u8a08\u7b97\u51fa\u4f86\u7684\u3002\u5176\u4e2d\u5728\u4e0b\u5716\u516c\u5f0f\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019 Entropy \u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642 Entropy \u5373\u70ba 1\u3002 Gini \u4e0d\u7d14\u5ea6 (Gini Impurity) Gini \u4e0d\u7d14\u5ea6\u662f\u53e6\u4e00\u7a2e\u4e82\u5ea6\u7684\u8861\u91cf\u65b9\u5f0f\uff0c\u5b83\u7684\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u8d8a\u6df7\u4e82\u3002\u516c\u5f0f\u5982\u4e0b\u6240\u793a\uff0c\u5176\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u70ba\u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0.5\u3002 \u8ff4\u6b78\u6a39 \u6c7a\u7b56\u6a39\u8ff4\u6b78\u65b9\u6cd5\u8207\u5206\u985e\u6709\u9ede\u985e\u4f3c\u5dee\u5225\u50c5\u5728\u65bc\u8a55\u4f30\u5206\u679d\u597d\u58de\u7684\u65b9\u5f0f\u4e0d\u540c\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u4f5c\u8ff4\u6b78\u6a39\u3002\u7576\u6578\u64da\u96c6\u7684\u8f38\u51fa\u7232\u9023\u7e8c\u6027\u6578\u503c\u6642\uff0c\u8a72\u6a39\u7b97\u6cd5\u5c31\u662f\u4e00\u500b\u8ff4\u6b78\u6a39\u3002\u900f\u904e\u6a39\u7684\u5c55\u958b\uff0c\u4e26\u7528\u8449\u7bc0\u9ede\u7684\u5747\u503c\u4f5c\u7232\u9810\u6e2c\u503c\u3002\u5f9e\u6839\u7bc0\u9ede\u958b\u59cb\uff0c\u5c0d\u6a23\u672c\u7684\u67d0\u4e00\u7279\u5fb5\u9032\u884c\u6e2c\u8a66\u3002\u7d93\u904e\u8a55\u4f30\u5f8c\uff0c\u5c07\u6a23\u672c\u5206\u914d\u5230\u5176\u5b50\u7d50\u9ede\u3002\u6b64\u6642\u6bcf\u4e00\u500b\u5b50\u7bc0\u9ede\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u4e00\u500b\u503c\u3002\u4f9d\u7167\u9019\u6a23\u65b9\u5f0f\u9032\u884c\uff0c\u76f4\u81f3\u5230\u9054\u8449\u7d50\u9ede\u3002\u6b64\u6642\u8aa4\u5dee\u503c\u8981\u6700\u5c0f\u5316\uff0c\u4e26\u4e14\u8d8a\u63a5\u8fd1\u96f6\u8d8a\u597d\u3002 \u8ff4\u6b78\u6a39\u7684\u751f\u9577\u904e\u7a0b\u5f88\u63a8\u85a6\u770b \u9019\u7bc7 \u6587\u7ae0 \u4ee5\u4e0b\u8209\u4e00\u500b\u4f8b\u5b50\u5047\u8a2d x \u662f\u8f38\u5165 y \u662f\u8f38\u51fa\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u4e00\u500b\u5e73\u9762\u4e0a\u7e6a\u88fd\u51fa\u8cc7\u6599\u8207\u6b63\u78ba\u7b54\u6848\u9593\u7684\u5206\u4f48\u3002\u5047\u8a2d\u8ff4\u6b78\u6a39\u7684\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u5169\u5c64\u3002\u9996\u5148\u5728\u7b2c\u4e00\u5c64\u4e2d\u6703\u5c07\u6240\u6709\u7684\u8cc7\u6599\u5f9e\u4e2d\u9593\u5207\u4e00\u5200\u6b64\u65b7\u9ede\u70ba x=0.496 \u7576\u5927\u65bc\u8a2d\u5b9a\u7684\u503c\u7684\u6578\u64da\u9ede\u6703\u7e7c\u7e8c\u5f80\u53f3\u5b50\u6a39\u4e0b\u53bb\u5ef6\u4f38\uff0c\u53cd\u4e4b\u5c0f\u65bc 0.496 \u7684\u8cc7\u6599\u9ede\u6703\u5f80\u5de6\u5b50\u6a39\u8d70\u3002\u6b64\u6642\u5c07\u6703\u5207\u51fa\u4e00\u500b\u5206\u652f\u51fa\u4f86\u4e26\u5f80\u4e0b\u64f4\u5c55\u4e26\u5f62\u6210\u7b2c\u4e8c\u5c64\u7684\u6c7a\u7b56\u5206\u652f\u3002\u4e00\u76f4\u4e0d\u65b7\u6301\u7e8c\u62d3\u5c55\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u7d42\u6b62\uff0c\u6b64\u6642\u7684\u7bc0\u9ede\u5373\u70ba\u8449\u7bc0\u9ede\u4e5f\u5c31\u662f\u6700\u7d42\u7684\u6a21\u578b\u8f38\u51fa\u503c\u3002 \u6a39\u8d8a\u6df1\u6a21\u578b\u8d8a\u8907\u96dc \u5047\u8a2d\u6211\u5011\u751f\u6210\u4e00\u500b f(x) = 3x+15 + noise \u7684\u8cc7\u6599\uff0c\u5176\u4e2d noise \u70ba\u4e00\u500b 0~1 \u4e4b\u9593\u7684\u96a8\u6a5f\u6578\u3002\u5f9e\u4ee5\u4e0b\u7684\u6e2c\u8a66\u53ef\u4ee5\u770b\u51fa\u96a8\u8457\u6c7a\u7b56\u6a39\u6df1\u5ea6\u7684\u589e\u52a0\uff0c\u6c7a\u7b56\u6a39\u7684\u64ec\u5408\u80fd\u529b\u4e0d\u65b7\u4e0a\u5347\u3002\u6c7a\u7b56\u6a39\u5df2\u7d93\u4e0d\u50c5\u50c5\u64ec\u5408\u4e86\u6211\u5011\u7684\u7dda\u6027\u51fd\u5f0f 3x+15 \uff0c\u540c\u6642\u4e5f\u64ec\u5408\u4e86\u6211\u5011\u6dfb\u52a0\u7684\u566a\u97f3(noise)\u3002 \u8ff4\u6b78\u6a39\u8a72\u5982\u4f55\u9078\u64c7\u5207\u5272\u9ede? \u5728\u5206\u985e\u6a21\u578b\u4e2d\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e82\u5ea6\u4f5c\u70ba\u6c7a\u7b56\u6a39\u751f\u6210\u6642\u5019\u7684\u8a55\u4f30\u6307\u6a19\u3002\u4f46\u662f\u8ff4\u6b78\u6a39\u900f\u904e\u662f MSE \u6216 MAE \u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u4e26\u627e\u51fa\u8aa4\u5dee\u6700\u5c0f\u7684\u503c\u4f5c\u70ba\u6a39\u7684\u7279\u5fb5\u9078\u64c7\u8207\u5207\u5272\u9ede\u3002\u5176\u4e2d\u524d\u8005\u662f\u5747\u65b9\u5dee\uff0c\u5f8c\u8005\u662f\u548c\u5747\u503c\u4e4b\u5dee\u7684\u7d55\u5c0d\u503c\u4e4b\u548c\u3002 CART \u6c7a\u7b56\u6a39 \u5728 Sklearn \u5957\u4ef6\u4e2d\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u662f\u63a1\u7528 CART (Classification and Regression Tree) \u6f14\u7b97\u6cd5\uff0c\u4e26\u4e14\u53ef\u4ee5\u88ab\u62ff\u4f86\u505a\u5206\u985e\u548c\u8ff4\u6b78\u7684\u9810\u6e2c\u3002\u5728\u6c7a\u7b56\u6a39\u7684\u6bcf\u4e00\u500b\u7bc0\u9ede\u4e0a\u90fd\u662f\u63a1\u7528\u4e8c\u5206\u6cd5\uff0c\u4e5f\u5c31\u662f\u6bcf\u4e00\u500b\u6c7a\u7b56\u7bc0\u9ede\u53ea\u5206\u679d\u51fa\u5169\u500b\u5b50\u7bc0\u9ede\u3002\u4e26\u4e14\u4e0d\u65b7\u5730\u5f80\u4e0b\u62d3\u5c55\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u70ba\u6b62\uff0c\u6b64\u6642\u6700\u5927\u6df1\u5ea6\u7684\u7bc0\u9ede\u7a31\u70ba\u8449\u7bc0\u9ede\u5373\u70ba\u6a21\u578b\u7684\u9810\u6e2c\u8f38\u51fa\u3002 \u6c7a\u7b56\u6a39\u6a21\u578b\u7684\u512a\u7f3a\u9ede \u5efa\u7acb\u6c7a\u7b56\u6a39\u7684\u904e\u7a0b\u5c31\u662f\u4e0d\u65b7\u7684\u5c0b\u627e\u7279\u5fb5\u9032\u884c\u6c7a\u7b56\uff0c\u900f\u904e\u9019\u4e9b\u6c7a\u7b56\u76e1\u91cf\u7684\u4f7f\u9019\u4e9b\u8cc7\u6599\u88ab\u5206\u70ba\u540c\u4e00\u500b\u985e\u5225\uff0c\u4e14\u8a66\u8457\u8b93\u6df7\u4e82\u7a0b\u5ea6\u8d8a\u5c0f\u8d8a\u597d\u3002\u5207\u8a18\u6a39\u7684\u6df1\u5ea6\u8d8a\u6df1\u4e0d\u4e00\u5b9a\u8d8a\u597d\uff0c\u4ed6\u53ef\u80fd\u6703\u9020\u6210\u904e\u5ea6\u64ec\u5408\u7684\u554f\u984c\u3002\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6211\u5011\u80fd\u5920\u8996\u89ba\u5316\u6c7a\u7b56\u6a39\u7684\u7d50\u69cb\uff0c\u76f8\u5c0d\u7684\u53ef\u89e3\u91cb\u6027\u5c31\u8b8a\u9ad8\u3002\u6b64\u5916\u8207\u5176\u5b83\u7684ML\u6a21\u578b\u6bd4\u8f03\u8d77\u4f86\uff0c\u6c7a\u7b56\u6a39\u57f7\u884c\u901f\u5ea6\u662f\u5b83\u7684\u4e00\u5927\u512a\u52e2\u3002\u56e0\u70ba\u662f\u6a39\u72c0\u7d50\u69cb\uff0c\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u6642\u5019\u6bcf\u500b\u6c7a\u7b56\u968e\u6bb5\u90fd\u76f8\u7576\u7684\u660e\u78ba\u6e05\u695a\uff0c\u4e0d\u662f 0 \u5c31\u662f 1\u3002 \u6c7a\u7b56\u6a39\u7e3d\u7d50 \u6c7a\u7b56\u6a39\u900f\u904e\u6240\u6709\u7279\u5fb5\u8207\u5c0d\u61c9\u7684\u503c\u5c07\u8cc7\u6599\u5207\u5206\uff0c\u4f86\u627e\u51fa\u6700\u9069\u5408\u7684\u5206\u679d\u4e26\u7e7c\u7e8c\u5f80\u4e0b\u62d3\u5c55\u3002\u82e5\u6c7a\u7b56\u6a39\u6df1\u5ea6\u8d8a\u6df1\u5247\u6c7a\u7b56\u7684\u898f\u5247\u5c07\u8d8a\u8907\u96dc\uff0c\u6a21\u578b\u9810\u6e2c\u4e5f\u6703\u8d8a\u63a5\u8fd1\u771f\u5be6\u7b54\u6848\u3002\u4f46\u82e5\u8a13\u7df4\u96c6\u4e2d\u542b\u6709\u904e\u591a\u7684\u96dc\u8a0a\uff0c\u592a\u6df1\u7684\u6a39\u5c31\u6709\u53ef\u80fd\u7522\u751f\u904e\u64ec\u5408\u7684\u60c5\u5f62\u3002\u56e0\u6b64\u55ae\u4e00\u7684\u6c7a\u7b56\u6a39\u80af\u5b9a\u662f\u4e0d\u5920\u7528\u7684\uff0c\u6211\u5011\u53ef\u4ee5\u5229\u7528\u96c6\u6210\u5b78\u7fd2\u4e2d\u7684 Boosting \u67b6\u69cb\uff0c\u5c0d\u8ff4\u6b78\u6a39\u9032\u884c\u6539\u826f\u5347\u7d1a\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u5206\u985e\u6c7a\u7b56\u6a39 \u4e00\u500b\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u81ea\u52d5\u7522\u751f\u4e00\u68f5\u6a39\u3002\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8cc7\u6599\u7522\u751f\u5f88\u591a\u6a39\u72c0\u7684\u898f\u5247\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u6703\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002 Parameters: - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeClassifier # \u5efa\u7acb DecisionTreeClassifier \u6a21\u578b decisionTreeModel = DecisionTreeClassifier ( criterion = 'entropy' , max_depth = 6 , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = decisionTreeModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = decisionTreeModel . score ( train_reduced , y_train ) \u6211\u5011\u900f\u904e\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u4e26\u8a13\u7df4\u4e00\u500b\u6c7a\u7b56\u6a39\u6a21\u578b\u3002\u900f\u904e\u7e6a\u88fd\u8a13\u7df4\u6c7a\u7b56\u908a\u754c\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u4e0b\u5716\u53f3\u624b\u908a\u7684\u8a13\u7df4\u96c6\u5b8c\u6574\u5730\u5c07\u4e09\u500b\u985e\u5225\u5207\u5272\u958b\u4f86\u3002\u800c\u5728\u53f3\u908a\u7684\u6e2c\u8a66\u96c6\u4e2d\u50c5\u6709\u4e00\u7b46\u7d05\u8272\u6846\u8d77\u4f86\u7684\u8cc7\u6599\u9810\u6e2c\u932f\u8aa4\u3002 \u8ff4\u6b78\u6c7a\u7b56\u6a39 Parameters: - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/friedman_mse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeRegressor # \u5efa\u7acb DecisionTreeRegressor \u6a21\u578b decisionTreeModel = DecisionTreeRegressor ( criterion = 'mse' , max_depth = 4 , splitter = 'best' , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = decisionTreeModel . predict ( x ) \u5728\u8ff4\u6b78\u6c7a\u7b56\u6a39\u4e2d\u6211\u5011\u4f7f\u7528\u4e86\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u8207\u975e\u7dda\u6027\u8ff4\u6b78\u5169\u7a2e\u8cc7\u6599\u96c6\u9032\u884c\u6578\u64da\u64ec\u5408\u5be6\u9a57\u3002\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u4e2d\u6211\u5011\u5c07\u6578\u64da\u9ede\u6dfb\u52a0\u4e00\u4e9b\u566a\u97f3\u8b93\u8cc7\u6599\u5206\u5e03\u5728\u659c\u76f4\u7dda\u4e0a\u3002\u5de6\u5716\u662f\u8ff4\u6b78\u6a39\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u8a13\u7df4\u7d50\u679c\uff0c\u53ef\u4ee5\u96b1\u7d04\u5730\u770b\u5230\u6a21\u578b\u6c7a\u7b56\u7684\u65b9\u5f0f\u5448\u73fe\u968e\u68af\u72c0\u614b\u3002\u5982\u679c\u6211\u5011\u5617\u8a66\u7684\u5c07\u6578\u7684\u6df1\u5ea6\u589e\u52a0\uff0c\u6a21\u578b\u76f8\u5c0d\u8907\u96dc\u56e0\u6b64\u53ef\u4ee5\u64ec\u5408\u5f97\u66f4\u597d\u3002\u800c\u53f3\u908a\u662f\u900f\u904e\u96a8\u6a5f\u7522\u751f\u7684\u975e\u7dda\u6027\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u6642\u5019\uff0c\u8a13\u7df4\u7d50\u679c\u5c31\u9084\u4e0d\u932f\u4e86\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u770b\u770b\u8abf\u6574\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u4ee5\u53ca\u5176\u4ed6\u7684\u8d85\u53c3\u6578\u5c0d\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u7684\u5f71\u97ff\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree)"},{"location":"12.\u6c7a\u7b56\u6a39/#day-12-decision-tree","text":"","title":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree)"},{"location":"12.\u6c7a\u7b56\u6a39/#_1","text":"\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4ecb\u7d39 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u6c7a\u7b56\u6a39\u5206\u985e\u5668 \u89c0\u5bdf\u6c7a\u7b56\u6a39\u662f\u5982\u4f55\u751f\u6210\u7684\u3002 \u5be6\u4f5c\u6c7a\u7b56\u6a39\u8ff4\u6b78\u5668 \u67e5\u770b\u6c7a\u7b56\u6a39\u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"12.\u6c7a\u7b56\u6a39/#_2","text":"\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u7522\u751f\u4e00\u68f5\u6a39\uff0c\u4f9d\u64da\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u4f86\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u65b9\u5f0f\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de(\u4e82\u5ea6)\uff0c\u4f8b\u5982\u50cf\u662f Information gain\u3001Gain ratio\u3001Gini index\u3002\u4f9d\u64da\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u5408\u9069\u7684\u898f\u5247\uff0c\u6700\u7d42\u751f\u6210\u4e00\u500b\u898f\u5247\u6a39\u4f86\u6c7a\u7b56\u6240\u6709\u4e8b\u60c5\uff0c\u5176\u76ee\u7684\u4f7f\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5c31\u597d\u6bd4\u6211\u5011\u8a55\u4f30\u4eca\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\uff0c\u5929\u6c23\u56e0\u5b50\u53ef\u80fd\u7ad9\u6bd4\u8f03\u5927\u7684\u56e0\u7d20\uff0c\u800c Co2 \u7684\u6fc3\u5ea6\u9ad8\u4f4e\u53ef\u80fd\u4f54\u7684\u56e0\u5b50\u7a0b\u5ea6\u8f03\u4f4e\u3002\u56e0\u6b64\u5728\u7b2c\u4e00\u5c64\u7684\u6c7a\u7b56\u4e2d\u4ee5\u5929\u6c23\u7684\u7279\u5fb5\u5148\u9032\u884c\u7b2c\u4e00\u6b21\u7684\u6c7a\u7b56\u5224\u65b7\u3002\u63a5\u8457\u7b2c\u4e8c\u5c64\u518d\u5f9e\u6240\u6709\u7279\u5fb5\u4e2d\u5c0b\u627e\u6700\u9069\u5408\u7684\u6c7a\u7b56\u56e0\u5b50\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6a39\u7684\u6df1\u5ea6\u5373\u505c\u6b62\u6a39\u7684\u751f\u9577\u3002","title":"\u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_3","text":"\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e00\u500b\u8caa\u5a6a\u6cd5\u5247\u4f86\u6c7a\u5b9a\u6bcf\u4e00\u5c64\u8981\u554f\u4ec0\u9ebc\u554f\u984c\uff0c\u76ee\u6a19\u662f\u5206\u985e\u904e\u5f8c\u6bcf\u4e00\u7fa4\u80fd\u5920\u5f88\u660e\u986f\u7684\u77e5\u9053\u662f\u5c6c\u65bc\u54ea\u4e00\u7a2e\u985e\u5225\u3002\u5ef6\u7e8c\u4e0a\u9762\u7684\u4f8b\u5b50\uff0c\u4ee5\u5206\u985e\u554f\u984c\u4f86\u8aaa\u5047\u8a2d\u8981\u8a55\u4f30\u660e\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\u3002\u5728\u6a39\u7684\u7b2c\u4e00\u5c64\u7bc0\u9ede\u4e2d\u6211\u5011\u8981\u5f9e\u5df2\u77e5\u7684\u5169\u500b\u7279\u5fb5\u5206\u5225\u662f\u6eab\u5ea6\u8207\u7279\u5fb5\u9078\u4e00\u500b\u4f5c\u70ba\u8a72\u5c64\u7684\u6c7a\u7b56\u56e0\u5b50\u3002\u5047\u8a2d\u76ee\u524d\u8a13\u7df4\u96c6\u6709\u4e94\u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u6b63\u5e38\u8209\u884c\u7684\u6709\u5169\u7b46\u8cc7\u6599\uff0c\u53d6\u6d88\u8209\u884c\u7684\u6709\u4e09\u7b46\u8cc7\u6599\u3002\u5728\u6a39\u7684\u7d50\u69cb\u4e2d\u5de6\u5b50\u6a39\u70ba\u6c7a\u7b56\u6b63\u5e38\u53d6\u884c\uff0c\u800c\u53f3\u5b50\u6a39\u662f\u6c7a\u7b56\u53d6\u6d88\u8209\u884c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7576\u7279\u5fb5\u70ba\u5929\u6c23\u7684\u6642\u5019\u53ef\u4ee5\u4e00\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5169\u985e\u5225\u5b8c\u6574\u5206\u958b\uff0c\u56e0\u6b64\u6211\u5011\u6703\u5c07\u5929\u6c23\u4f5c\u70ba\u9019\u4e00\u5c64\u5224\u65b7\u7684\u56e0\u5b50\u3002\u9019\u5c31\u662f\u6c7a\u7b56\u6a39\u5728\u751f\u6210\u4e2d\u7684\u8caa\u5a6a\u6a5f\u5236\u3002\u7136\u800c\u8981\u5982\u4f55\u53bb\u5224\u65b7\u6bcf\u6b21\u6c7a\u7b56\u7684\u597d\u58de\uff0c\u5c31\u5fc5\u9808\u4f9d\u9760\u4e82\u5ea6\u7684\u8a55\u4f30\u6307\u6a19\u3002","title":"\u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f"},{"location":"12.\u6c7a\u7b56\u6a39/#_4","text":"\u6211\u5011\u9700\u8981\u5ba2\u89c0\u7684\u6a19\u6e96\u4f86\u6c7a\u5b9a\u6c7a\u7b56\u6a39\u7684\u6bcf\u500b\u5206\u652f\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u6709\u4e00\u500b\u8a55\u65b7\u7684\u6307\u6a19\u4f86\u5354\u52a9\u6211\u5011\u6c7a\u7b56\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u6307\u6a19\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de\uff0c\u5e38\u898b\u7684\u6c7a\u7b56\u4e82\u5ea6\u8a55\u4f30\u6307\u6a19\u6709 Information gain\u3001Gain ratio\u3001Gini index\u3002\u6211\u5011\u76ee\u6a19\u662f\u5f9e\u8a13\u7df4\u8cc7\u6599\u4e2d\u627e\u51fa\u4e00\u5957\u6c7a\u7b56\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u4ee5\u4e0a\u7684\u6307\u6a19\u90fd\u662f\u5728\u8861\u91cf\u4e00\u500b\u5e8f\u5217\u4e2d\u7684\u6df7\u4e82\u7a0b\u5ea6\uff0c\u5176\u6578\u503c\u8d8a\u9ad8\u4ee3\u8868\u8d8a\u6df7\u4e82\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u9810\u8a2d\u4f7f\u7528 Gini\u3002 Information gain (\u8cc7\u8a0a\u7372\u5229) Gain ratio (\u5409\u5c3c\u7372\u5229) Gini index (\u5409\u5c3c\u4fc2\u6578) = Gini Impurity (\u5409\u5c3c\u4e0d\u7d14\u5ea6)","title":"\u6c7a\u7b56\u6a39\u7684\u6df7\u4e82\u8a55\u4f30\u6307\u6a19"},{"location":"12.\u6c7a\u7b56\u6a39/#_5","text":"Information Gain \u900f\u904e\u5f9e\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5176\u7b97\u6cd5\u4e3b\u8981\u662f\u8a08\u7b97\u71b5\uff0c\u56e0\u6b64\u7d93\u7531\u6c7a\u7b56\u6a39\u5206\u5272\u5f8c\u7684\u8cc7\u8a0a\u91cf\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u800c Gini \u7684\u6578\u503c\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u4e82\uff0c\u6578\u503c\u7686\u70ba 0~1 \u4e4b\u9593\uff0c\u5176\u4e2d 0 \u4ee3\u8868\u8a72\u7279\u5fb5\u5728\u5e8f\u5217\u4e2d\u662f\u5b8c\u7f8e\u7684\u5206\u985e\u3002\u5e38\u898b\u7684\u8cc7\u8a0a\u91cf\u8a55\u4f30\u65b9\u6cd5\u6709\u5169\u7a2e\uff1a\u8cc7\u8a0a\u7372\u5229 (Information Gain) \u4ee5\u53ca Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)\u3002","title":"\u8a55\u4f30\u5206\u5272\u8cc7\u8a0a\u91cf"},{"location":"12.\u6c7a\u7b56\u6a39/#entropy","text":"\u71b5 (Entropy) \u662f\u8a08\u7b97 Information Gain \u7684\u4e00\u7a2e\u65b9\u6cd5\u3002\u5728\u4e86\u89e3 Information Gain \u4e4b\u524d\u8981\u5148\u4e86\u89e3\u71b5\u662f\u5982\u4f55\u88ab\u8a08\u7b97\u51fa\u4f86\u7684\u3002\u5176\u4e2d\u5728\u4e0b\u5716\u516c\u5f0f\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019 Entropy \u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642 Entropy \u5373\u70ba 1\u3002","title":"\u71b5 (Entropy)"},{"location":"12.\u6c7a\u7b56\u6a39/#gini-gini-impurity","text":"Gini \u4e0d\u7d14\u5ea6\u662f\u53e6\u4e00\u7a2e\u4e82\u5ea6\u7684\u8861\u91cf\u65b9\u5f0f\uff0c\u5b83\u7684\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u8d8a\u6df7\u4e82\u3002\u516c\u5f0f\u5982\u4e0b\u6240\u793a\uff0c\u5176\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u70ba\u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0.5\u3002","title":"Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)"},{"location":"12.\u6c7a\u7b56\u6a39/#_6","text":"\u6c7a\u7b56\u6a39\u8ff4\u6b78\u65b9\u6cd5\u8207\u5206\u985e\u6709\u9ede\u985e\u4f3c\u5dee\u5225\u50c5\u5728\u65bc\u8a55\u4f30\u5206\u679d\u597d\u58de\u7684\u65b9\u5f0f\u4e0d\u540c\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u4f5c\u8ff4\u6b78\u6a39\u3002\u7576\u6578\u64da\u96c6\u7684\u8f38\u51fa\u7232\u9023\u7e8c\u6027\u6578\u503c\u6642\uff0c\u8a72\u6a39\u7b97\u6cd5\u5c31\u662f\u4e00\u500b\u8ff4\u6b78\u6a39\u3002\u900f\u904e\u6a39\u7684\u5c55\u958b\uff0c\u4e26\u7528\u8449\u7bc0\u9ede\u7684\u5747\u503c\u4f5c\u7232\u9810\u6e2c\u503c\u3002\u5f9e\u6839\u7bc0\u9ede\u958b\u59cb\uff0c\u5c0d\u6a23\u672c\u7684\u67d0\u4e00\u7279\u5fb5\u9032\u884c\u6e2c\u8a66\u3002\u7d93\u904e\u8a55\u4f30\u5f8c\uff0c\u5c07\u6a23\u672c\u5206\u914d\u5230\u5176\u5b50\u7d50\u9ede\u3002\u6b64\u6642\u6bcf\u4e00\u500b\u5b50\u7bc0\u9ede\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u4e00\u500b\u503c\u3002\u4f9d\u7167\u9019\u6a23\u65b9\u5f0f\u9032\u884c\uff0c\u76f4\u81f3\u5230\u9054\u8449\u7d50\u9ede\u3002\u6b64\u6642\u8aa4\u5dee\u503c\u8981\u6700\u5c0f\u5316\uff0c\u4e26\u4e14\u8d8a\u63a5\u8fd1\u96f6\u8d8a\u597d\u3002 \u8ff4\u6b78\u6a39\u7684\u751f\u9577\u904e\u7a0b\u5f88\u63a8\u85a6\u770b \u9019\u7bc7 \u6587\u7ae0 \u4ee5\u4e0b\u8209\u4e00\u500b\u4f8b\u5b50\u5047\u8a2d x \u662f\u8f38\u5165 y \u662f\u8f38\u51fa\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u4e00\u500b\u5e73\u9762\u4e0a\u7e6a\u88fd\u51fa\u8cc7\u6599\u8207\u6b63\u78ba\u7b54\u6848\u9593\u7684\u5206\u4f48\u3002\u5047\u8a2d\u8ff4\u6b78\u6a39\u7684\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u5169\u5c64\u3002\u9996\u5148\u5728\u7b2c\u4e00\u5c64\u4e2d\u6703\u5c07\u6240\u6709\u7684\u8cc7\u6599\u5f9e\u4e2d\u9593\u5207\u4e00\u5200\u6b64\u65b7\u9ede\u70ba x=0.496 \u7576\u5927\u65bc\u8a2d\u5b9a\u7684\u503c\u7684\u6578\u64da\u9ede\u6703\u7e7c\u7e8c\u5f80\u53f3\u5b50\u6a39\u4e0b\u53bb\u5ef6\u4f38\uff0c\u53cd\u4e4b\u5c0f\u65bc 0.496 \u7684\u8cc7\u6599\u9ede\u6703\u5f80\u5de6\u5b50\u6a39\u8d70\u3002\u6b64\u6642\u5c07\u6703\u5207\u51fa\u4e00\u500b\u5206\u652f\u51fa\u4f86\u4e26\u5f80\u4e0b\u64f4\u5c55\u4e26\u5f62\u6210\u7b2c\u4e8c\u5c64\u7684\u6c7a\u7b56\u5206\u652f\u3002\u4e00\u76f4\u4e0d\u65b7\u6301\u7e8c\u62d3\u5c55\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u7d42\u6b62\uff0c\u6b64\u6642\u7684\u7bc0\u9ede\u5373\u70ba\u8449\u7bc0\u9ede\u4e5f\u5c31\u662f\u6700\u7d42\u7684\u6a21\u578b\u8f38\u51fa\u503c\u3002","title":"\u8ff4\u6b78\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_7","text":"\u5047\u8a2d\u6211\u5011\u751f\u6210\u4e00\u500b f(x) = 3x+15 + noise \u7684\u8cc7\u6599\uff0c\u5176\u4e2d noise \u70ba\u4e00\u500b 0~1 \u4e4b\u9593\u7684\u96a8\u6a5f\u6578\u3002\u5f9e\u4ee5\u4e0b\u7684\u6e2c\u8a66\u53ef\u4ee5\u770b\u51fa\u96a8\u8457\u6c7a\u7b56\u6a39\u6df1\u5ea6\u7684\u589e\u52a0\uff0c\u6c7a\u7b56\u6a39\u7684\u64ec\u5408\u80fd\u529b\u4e0d\u65b7\u4e0a\u5347\u3002\u6c7a\u7b56\u6a39\u5df2\u7d93\u4e0d\u50c5\u50c5\u64ec\u5408\u4e86\u6211\u5011\u7684\u7dda\u6027\u51fd\u5f0f 3x+15 \uff0c\u540c\u6642\u4e5f\u64ec\u5408\u4e86\u6211\u5011\u6dfb\u52a0\u7684\u566a\u97f3(noise)\u3002","title":"\u6a39\u8d8a\u6df1\u6a21\u578b\u8d8a\u8907\u96dc"},{"location":"12.\u6c7a\u7b56\u6a39/#_8","text":"\u5728\u5206\u985e\u6a21\u578b\u4e2d\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e82\u5ea6\u4f5c\u70ba\u6c7a\u7b56\u6a39\u751f\u6210\u6642\u5019\u7684\u8a55\u4f30\u6307\u6a19\u3002\u4f46\u662f\u8ff4\u6b78\u6a39\u900f\u904e\u662f MSE \u6216 MAE \u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u4e26\u627e\u51fa\u8aa4\u5dee\u6700\u5c0f\u7684\u503c\u4f5c\u70ba\u6a39\u7684\u7279\u5fb5\u9078\u64c7\u8207\u5207\u5272\u9ede\u3002\u5176\u4e2d\u524d\u8005\u662f\u5747\u65b9\u5dee\uff0c\u5f8c\u8005\u662f\u548c\u5747\u503c\u4e4b\u5dee\u7684\u7d55\u5c0d\u503c\u4e4b\u548c\u3002","title":"\u8ff4\u6b78\u6a39\u8a72\u5982\u4f55\u9078\u64c7\u5207\u5272\u9ede?"},{"location":"12.\u6c7a\u7b56\u6a39/#cart","text":"\u5728 Sklearn \u5957\u4ef6\u4e2d\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u662f\u63a1\u7528 CART (Classification and Regression Tree) \u6f14\u7b97\u6cd5\uff0c\u4e26\u4e14\u53ef\u4ee5\u88ab\u62ff\u4f86\u505a\u5206\u985e\u548c\u8ff4\u6b78\u7684\u9810\u6e2c\u3002\u5728\u6c7a\u7b56\u6a39\u7684\u6bcf\u4e00\u500b\u7bc0\u9ede\u4e0a\u90fd\u662f\u63a1\u7528\u4e8c\u5206\u6cd5\uff0c\u4e5f\u5c31\u662f\u6bcf\u4e00\u500b\u6c7a\u7b56\u7bc0\u9ede\u53ea\u5206\u679d\u51fa\u5169\u500b\u5b50\u7bc0\u9ede\u3002\u4e26\u4e14\u4e0d\u65b7\u5730\u5f80\u4e0b\u62d3\u5c55\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u70ba\u6b62\uff0c\u6b64\u6642\u6700\u5927\u6df1\u5ea6\u7684\u7bc0\u9ede\u7a31\u70ba\u8449\u7bc0\u9ede\u5373\u70ba\u6a21\u578b\u7684\u9810\u6e2c\u8f38\u51fa\u3002","title":"CART \u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_9","text":"\u5efa\u7acb\u6c7a\u7b56\u6a39\u7684\u904e\u7a0b\u5c31\u662f\u4e0d\u65b7\u7684\u5c0b\u627e\u7279\u5fb5\u9032\u884c\u6c7a\u7b56\uff0c\u900f\u904e\u9019\u4e9b\u6c7a\u7b56\u76e1\u91cf\u7684\u4f7f\u9019\u4e9b\u8cc7\u6599\u88ab\u5206\u70ba\u540c\u4e00\u500b\u985e\u5225\uff0c\u4e14\u8a66\u8457\u8b93\u6df7\u4e82\u7a0b\u5ea6\u8d8a\u5c0f\u8d8a\u597d\u3002\u5207\u8a18\u6a39\u7684\u6df1\u5ea6\u8d8a\u6df1\u4e0d\u4e00\u5b9a\u8d8a\u597d\uff0c\u4ed6\u53ef\u80fd\u6703\u9020\u6210\u904e\u5ea6\u64ec\u5408\u7684\u554f\u984c\u3002\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6211\u5011\u80fd\u5920\u8996\u89ba\u5316\u6c7a\u7b56\u6a39\u7684\u7d50\u69cb\uff0c\u76f8\u5c0d\u7684\u53ef\u89e3\u91cb\u6027\u5c31\u8b8a\u9ad8\u3002\u6b64\u5916\u8207\u5176\u5b83\u7684ML\u6a21\u578b\u6bd4\u8f03\u8d77\u4f86\uff0c\u6c7a\u7b56\u6a39\u57f7\u884c\u901f\u5ea6\u662f\u5b83\u7684\u4e00\u5927\u512a\u52e2\u3002\u56e0\u70ba\u662f\u6a39\u72c0\u7d50\u69cb\uff0c\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u6642\u5019\u6bcf\u500b\u6c7a\u7b56\u968e\u6bb5\u90fd\u76f8\u7576\u7684\u660e\u78ba\u6e05\u695a\uff0c\u4e0d\u662f 0 \u5c31\u662f 1\u3002","title":"\u6c7a\u7b56\u6a39\u6a21\u578b\u7684\u512a\u7f3a\u9ede"},{"location":"12.\u6c7a\u7b56\u6a39/#_10","text":"\u6c7a\u7b56\u6a39\u900f\u904e\u6240\u6709\u7279\u5fb5\u8207\u5c0d\u61c9\u7684\u503c\u5c07\u8cc7\u6599\u5207\u5206\uff0c\u4f86\u627e\u51fa\u6700\u9069\u5408\u7684\u5206\u679d\u4e26\u7e7c\u7e8c\u5f80\u4e0b\u62d3\u5c55\u3002\u82e5\u6c7a\u7b56\u6a39\u6df1\u5ea6\u8d8a\u6df1\u5247\u6c7a\u7b56\u7684\u898f\u5247\u5c07\u8d8a\u8907\u96dc\uff0c\u6a21\u578b\u9810\u6e2c\u4e5f\u6703\u8d8a\u63a5\u8fd1\u771f\u5be6\u7b54\u6848\u3002\u4f46\u82e5\u8a13\u7df4\u96c6\u4e2d\u542b\u6709\u904e\u591a\u7684\u96dc\u8a0a\uff0c\u592a\u6df1\u7684\u6a39\u5c31\u6709\u53ef\u80fd\u7522\u751f\u904e\u64ec\u5408\u7684\u60c5\u5f62\u3002\u56e0\u6b64\u55ae\u4e00\u7684\u6c7a\u7b56\u6a39\u80af\u5b9a\u662f\u4e0d\u5920\u7528\u7684\uff0c\u6211\u5011\u53ef\u4ee5\u5229\u7528\u96c6\u6210\u5b78\u7fd2\u4e2d\u7684 Boosting \u67b6\u69cb\uff0c\u5c0d\u8ff4\u6b78\u6a39\u9032\u884c\u6539\u826f\u5347\u7d1a\u3002","title":"\u6c7a\u7b56\u6a39\u7e3d\u7d50"},{"location":"12.\u6c7a\u7b56\u6a39/#_11","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"12.\u6c7a\u7b56\u6a39/#_12","text":"\u4e00\u500b\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u81ea\u52d5\u7522\u751f\u4e00\u68f5\u6a39\u3002\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8cc7\u6599\u7522\u751f\u5f88\u591a\u6a39\u72c0\u7684\u898f\u5247\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u6703\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002 Parameters: - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeClassifier # \u5efa\u7acb DecisionTreeClassifier \u6a21\u578b decisionTreeModel = DecisionTreeClassifier ( criterion = 'entropy' , max_depth = 6 , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = decisionTreeModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = decisionTreeModel . score ( train_reduced , y_train ) \u6211\u5011\u900f\u904e\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u4e26\u8a13\u7df4\u4e00\u500b\u6c7a\u7b56\u6a39\u6a21\u578b\u3002\u900f\u904e\u7e6a\u88fd\u8a13\u7df4\u6c7a\u7b56\u908a\u754c\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u4e0b\u5716\u53f3\u624b\u908a\u7684\u8a13\u7df4\u96c6\u5b8c\u6574\u5730\u5c07\u4e09\u500b\u985e\u5225\u5207\u5272\u958b\u4f86\u3002\u800c\u5728\u53f3\u908a\u7684\u6e2c\u8a66\u96c6\u4e2d\u50c5\u6709\u4e00\u7b46\u7d05\u8272\u6846\u8d77\u4f86\u7684\u8cc7\u6599\u9810\u6e2c\u932f\u8aa4\u3002","title":"\u5206\u985e\u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_13","text":"Parameters: - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/friedman_mse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeRegressor # \u5efa\u7acb DecisionTreeRegressor \u6a21\u578b decisionTreeModel = DecisionTreeRegressor ( criterion = 'mse' , max_depth = 4 , splitter = 'best' , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = decisionTreeModel . predict ( x ) \u5728\u8ff4\u6b78\u6c7a\u7b56\u6a39\u4e2d\u6211\u5011\u4f7f\u7528\u4e86\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u8207\u975e\u7dda\u6027\u8ff4\u6b78\u5169\u7a2e\u8cc7\u6599\u96c6\u9032\u884c\u6578\u64da\u64ec\u5408\u5be6\u9a57\u3002\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u4e2d\u6211\u5011\u5c07\u6578\u64da\u9ede\u6dfb\u52a0\u4e00\u4e9b\u566a\u97f3\u8b93\u8cc7\u6599\u5206\u5e03\u5728\u659c\u76f4\u7dda\u4e0a\u3002\u5de6\u5716\u662f\u8ff4\u6b78\u6a39\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u8a13\u7df4\u7d50\u679c\uff0c\u53ef\u4ee5\u96b1\u7d04\u5730\u770b\u5230\u6a21\u578b\u6c7a\u7b56\u7684\u65b9\u5f0f\u5448\u73fe\u968e\u68af\u72c0\u614b\u3002\u5982\u679c\u6211\u5011\u5617\u8a66\u7684\u5c07\u6578\u7684\u6df1\u5ea6\u589e\u52a0\uff0c\u6a21\u578b\u76f8\u5c0d\u8907\u96dc\u56e0\u6b64\u53ef\u4ee5\u64ec\u5408\u5f97\u66f4\u597d\u3002\u800c\u53f3\u908a\u662f\u900f\u904e\u96a8\u6a5f\u7522\u751f\u7684\u975e\u7dda\u6027\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u6642\u5019\uff0c\u8a13\u7df4\u7d50\u679c\u5c31\u9084\u4e0d\u932f\u4e86\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u770b\u770b\u8abf\u6574\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u4ee5\u53ca\u5176\u4ed6\u7684\u8d85\u53c3\u6578\u5c0d\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u7684\u5f71\u97ff\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u8ff4\u6b78\u6c7a\u7b56\u6a39"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/","text":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u6574\u9ad4\u5b78\u7fd2 \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u4e09\u7a2e\u4e0d\u540c\u7684\u6574\u9ad4\u5b78\u7fd2 Bagging\u3001Boosting\u3001Stacking \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u53c8\u7a31\u96c6\u6210\u5b78\u7fd2\u3001\u6574\u5408\u5b78\u7fd2\uff0c\u6307\u7684\u662f\u4ee5\u4e00\u500b\u7cfb\u7d71\u5316\u7684\u65b9\u5f0f\u5c07\u597d\u5e7e\u500b\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u6a21\u578b\u7d50\u5408\u5728\u4e00\u8d77\uff0c\u76ee\u7684\u662f\u5e0c\u671b\u7d50\u5408\u773e\u591a\u7684\u6a21\u578b\u7522\u751f\u4e00\u500b\u66f4\u5f37\u5927\u7684\u6a21\u578b\u3002\u5728\u8a31\u591a\u79d1\u5b78\u7af6\u8cfd\u4e2d Ensemble Learning \u5728\u5be6\u52d9\u4e0a\u662f\u975e\u5e38\u6709\u6548\u7684\u63d0\u5347\u9810\u6e2c\u6e96\u78ba\u7387\u3002\u4f9d\u7167 Ensemble \u7684\u8655\u7406\u65b9\u5f0f\u7684\u4e0d\u540c\uff0c\u6211\u5011\u53ef\u4ee5\u5c07\u5b83\u5206\u70ba\u4e09\u985e\u3002\u7b2c\u4e00\u985e\u70ba Bagging\uff0c\u7b2c\u4e8c\u985e\u70ba Boosting\uff0c\u7b2c\u4e09\u985e\u70ba Stacking\u3002 Bagging: Random forest Boosting: AdaBoost Gradient Boosting XGBoost Stacking Bagging \u81ea\u52a9\u91cd\u62bd\u7e3d\u5408\u6cd5 Bagging \u6307\u7684\u662f\u6211\u5011\u628a\u8a13\u7df4\u8cc7\u6599\u91cd\u65b0\u63a1\u6a23\u7522\u751f\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u5982\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Bagging \u4e4b\u67b6\u69cb\u3002\u6839\u64da\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\u5373\u4f7f\u6211\u5011\u7528\u540c\u4e00\u7a2e\u6f14\u7b97\u6cd5\u6211\u5011\u4e5f\u6703\u5f97\u5230\u4e0d\u4e00\u6a23\u7684\u6a21\u578b\uff0c\u4ed6\u7684\u6a39\u662f\u5404\u81ea\u7368\u7acb\u56e0\u6b64\u53ef\u4ee5\u5e73\u884c\u5316\u8655\u7406\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u662f\u96a8\u6a5f\u68ee\u6797\uff0c\u96a8\u6a5f\u68ee\u6797\u9664\u4e86 Bagging \u4e4b\u5916\uff0c\u9084\u6709\u53e6\u4e00\u500b\u96a8\u6a5f\u7684\u56e0\u7d20\u662f\u6bcf\u4e00\u68f5\u6a39\u90fd\u53ea\u80fd\u770b\u5230\u4e00\u90e8\u5206\u7684\u7279\u5fb5\uff0c\u9019\u4e9b\u7279\u5fb5\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a\u7684\u3002 Boosting \u63a8\u5347\u6cd5 Boosting \u5247\u6703\u6839\u64da\u6bcf\u4e00\u7b46\u8a13\u7df4\u8cc7\u6599\u7684\u96e3\u6216\u7c21\u55ae\u7d66\u4e88\u4e0d\u540c\u7684\u6b0a\u91cd\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u9996\u5148\u6211\u5011\u6703\u8a13\u7df4\u4e00\u500b base learner \u7136\u5f8c\u6839\u64da base learner \u9810\u6e2c\u7684\u7d50\u679c\u5c0d\u6216\u932f\u4f86\u5206\u8fa8\u8a72\u7b46\u8cc7\u6599\u662f\u4e00\u500b\u7c21\u55ae\u9084\u662f\u56f0\u96e3\u7684\u8cc7\u6599\u3002\u5c0d\u65bc\u96e3\u7684\u8cc7\u6599\u6211\u5011\u52a0\u5f37\u4ed6\u7684\u6b0a\u91cd\u518d\u8a13\u7df4\u4e00\u500b\u65b0\u7684\u5206\u985e\u5668\u6216\u8ff4\u6b78\u5668\u3002\u6211\u5011\u76ee\u6a19\u662f\u5e0c\u671b\u8a13\u7df4\u5f8c\uff0c\u65b0\u7684\u6a21\u578b\u5728\u9019\u4e9b\u96e3\u7684\u8cc7\u6599\u80fd\u5920\u8868\u73fe\u5f97\u66f4\u597d\u3002\u6211\u5011\u4e0d\u65b7\u91cd\u8907\u9019\u4e9b\u6b65\u9a5f\uff0c\u4e0d\u65b7\u5730\u52a0\u5165\u65b0\u7684 base learner\uff0c\u4e14\u65b0\u7684 base learner \u628a\u904e\u53bb\u8868\u73fe\u4e0d\u597d\u7684\u5730\u65b9\u6539\u5584\uff0c\u9019\u5c31\u662f Boosting \u7cbe\u795e\u3002\u56e0\u6b64 Boosting \u7684\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u6709\u95dc\u806f\u6027\u7684\u505a\u5b8c\u7b2c\u4e00\u68f5\u6a39\u53ef\u80fd\u9032\u884c\u4e0b\u4e00\u68f5\u6a39\u7684\u751f\u6210\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u6709 AdaBoost\u3001Gradient Boosting\uff0c\u5169\u7a2e\u90fd\u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u90fd\u662f\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u76ee\u6a19\u662f\u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6240\u4ee5\u6700\u7d42\u6211\u5011\u8981\u628a\u9019\u9ebc\u591a\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u3002 Stacking \u5806\u758a\u6cd5 \u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Stacking \u67b6\u69cb\u3002Stacking \u9996\u5148\u7522\u751f\u51fa m \u500b\u6a21\u578b\uff0c\u5f7c\u6b64\u9593\u4e26\u4e92\u76f8\u7121\u95dc\u9023\uff0c\u4f8b\u5982\u7b2c\u4e00\u500b\u6a21\u578b\u70ba logistic regression \u7b2c\u4e8c\u500b\u70ba\u6c7a\u7b56\u6a39\u3002\u8a13\u7df4\u5b8c m \u500b\u6a21\u578b\u5f8c\uff0c\u6211\u5011\u8981\u628a\u9019 m \u500b\u6a21\u578b\u5408\u4f75\u5728\u4e00\u8d77\u3002\u5408\u4f75\u7684\u65b9\u5f0f\u662f\u6211\u5011\u518d\u53e6\u5916\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u9019\u500b\u6a21\u578b\u628am\u500b\u6a21\u578b\u7684\u8f38\u51fa\u7576\u6210\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u56e0\u6b64\u6211\u5011\u6703\u6839\u64da\u9019m\u500b\u7279\u5fb5\u5229\u7528\u6574\u9ad4\u5b78\u7fd2\u5176\u4e2d\u7684\u6f14\u7b97\u6cd5\u4f86\u5b78\u7fd2\u4e00\u500b\u6a21\u578b\u4e26\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002 \u5340\u57df\u5b78\u7fd2 (Patch Learning) \u5340\u57df\u5b78\u7fd2 ( Patch Learning , PL) \u65bc 2019 \u5e74\u7531\u7f8e\u570b\u5357\u52a0\u5dde\u5927\u5b78 Jerry M. Mendel \u8207 Dongrui Wu \u6240\u63d0\u51fa\u7684\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u65b9\u6cd5\u3002\u6240\u8b02\u7684\u5340\u57df\u5b78\u7fd2\u662f\u80fd\u5920\u6709\u6548\u7684\u638c\u63e1\u8868\u73fe\u4e0d\u597d\u7684\u5340\u9593\uff0c\u7d93\u904e\u8a13\u7df4\u4e00\u500b\u5168\u57df\u7684\u6a21\u578b\u5f8c\u4e26\u4efb\u4e00\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u627e\u51fa\u9019\u4e9b\u8aa4\u5dee\u5927\u7684 Patch\uff0c\u900f\u904e\u591a\u500b\u65b7\u9ede\u7684\u5b78\u7fd2\u6211\u5011\u6703\u5f97\u5230 Global Model\u3001Patch1 Model\u3001Patch2 Model\u2026Patch(n) Model\u3002\u7136\u800c\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u6709\u5f88\u591a\u7a2e\u65b9\u6cd5\u53ef\u4ee5\u6539\u5584\u6211\u5011\u7684\u6a21\u578b\uff0c\u4f8b\u5982\u52a0\u6df1\u548c\u52a0\u5bec\u795e\u7d93\u7db2\u8def\u6216\u662f\u6dfb\u52a0\u4e00\u4e9b\u975e\u7dda\u6027\u7684\u6fc0\u52f5\u51fd\u6578\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u6216\u662f\u4f7f\u7528\u6574\u9ad4\u5b78\u7fd2\u7684\u65b9\u6cd5\u96c6\u5408\u8a31\u591a\u7b56\u7565\uff0c\u6700\u7d42\u5f62\u6210\u4e00\u500b\u5f37\u5b78\u7fd2\u5668\u4e26\u6539\u5584\u67d0\u4e9b\u5340\u57df\u7684\u5f31\u9ede\u3002\u4e0b\u5716\u70ba\u4e00\u500b\u7c21\u55ae\u7684\u5340\u57df\u5b78\u7fd2\u9810\u6e2c\u6d41\u7a0b\u5716\u3002\u5728\u4f7f\u7528\u5340\u57df\u5b78\u7fd2\u6a21\u578b\u524d\u6211\u5011\u8981\u627e\u51fa\u8a72\u6bb5\u8f38\u5165\u6240\u5c0d\u61c9\u7684 Patch\uff0c\u82e5\u8a72\u5340\u9593\u525b\u597d\u843d\u65bc\u6240\u5283\u5b9a\u7684\u7bc4\u570d\u5167\uff0c\u9019\u4e9b\u8f38\u5165\u5c31\u5c07\u6703\u5c0d\u61c9\u5230\u76f8\u5c0d\u61c9\u7684\u5340\u57df\u5b78\u7fd2\uff0c\u5426\u5247\u5c31\u6703\u4f7f\u7528\u5168\u57df\u6a21\u578b\u9032\u884c\u9810\u6e2c\u3002 \u5340\u57df\u5b78\u7fd2\u6a5f\u5236\u5305\u62ec\u4e09\u500b\u90e8\u5206\uff0c\u7c21\u8981\u8aaa\u660e\u5982\u4e0b: (1)\u4f7f\u7528\u6240\u6709\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u4e00\u500b\u5168\u57df\u6a21\u578b; (2)\u6311\u51fa\u5f71\u97ff\u932f\u8aa4\u7387\u8f03\u9ad8\u7684\u8cc7\u6599\uff0c\u518d\u653e\u5165\u500b\u5225\u7684 Patch \u6a21\u578b\u9032\u884c\u5340\u57df\u8a13\u7df4; (3)\u81ea\u8a13\u7df4\u8cc7\u6599\u4e2d\u53bb\u6389\u5df2\u7d93\u88ab\u5c40\u90e8\u6a21\u578b\u7528\u904e\u7684\u8cc7\u6599\uff0c\u518d\u4f7f\u7528\u5269\u4e0b\u7684\u6240\u6709\u8cc7\u6599\u66f4\u65b0\u5168\u57df\u6a21\u578b\u3002\u7576\u8f38\u5165\u8cc7\u6599\u9032\u4f86\u6642\uff0c\u9996\u5148\u5224\u65b7\u9019\u500b\u8f38\u5165\u662f\u4e0d\u662f\u5728\u525b\u525b\u8a18\u4e0b\u7684 Patch \u6a21\u578b\u4e2d\uff0c\u5982\u679c\u662f\u7684\u8a71\uff0c\u5c31\u57f7\u884c Patch \u6a21\u578b\u3002\u5982\u679c\u4e0d\u662f\u7684\u8a71\uff0c\u57f7\u884c\u66f4\u65b0\u5f8c\u7684\u5168\u57df\u6a21\u578b\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning)"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#day-13-ensemble-learning","text":"","title":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning)"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#_1","text":"\u4e86\u89e3\u6574\u9ad4\u5b78\u7fd2 \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u4e09\u7a2e\u4e0d\u540c\u7684\u6574\u9ad4\u5b78\u7fd2 Bagging\u3001Boosting\u3001Stacking","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#_2","text":"\u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u53c8\u7a31\u96c6\u6210\u5b78\u7fd2\u3001\u6574\u5408\u5b78\u7fd2\uff0c\u6307\u7684\u662f\u4ee5\u4e00\u500b\u7cfb\u7d71\u5316\u7684\u65b9\u5f0f\u5c07\u597d\u5e7e\u500b\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u6a21\u578b\u7d50\u5408\u5728\u4e00\u8d77\uff0c\u76ee\u7684\u662f\u5e0c\u671b\u7d50\u5408\u773e\u591a\u7684\u6a21\u578b\u7522\u751f\u4e00\u500b\u66f4\u5f37\u5927\u7684\u6a21\u578b\u3002\u5728\u8a31\u591a\u79d1\u5b78\u7af6\u8cfd\u4e2d Ensemble Learning \u5728\u5be6\u52d9\u4e0a\u662f\u975e\u5e38\u6709\u6548\u7684\u63d0\u5347\u9810\u6e2c\u6e96\u78ba\u7387\u3002\u4f9d\u7167 Ensemble \u7684\u8655\u7406\u65b9\u5f0f\u7684\u4e0d\u540c\uff0c\u6211\u5011\u53ef\u4ee5\u5c07\u5b83\u5206\u70ba\u4e09\u985e\u3002\u7b2c\u4e00\u985e\u70ba Bagging\uff0c\u7b2c\u4e8c\u985e\u70ba Boosting\uff0c\u7b2c\u4e09\u985e\u70ba Stacking\u3002 Bagging: Random forest Boosting: AdaBoost Gradient Boosting XGBoost Stacking","title":"\u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#bagging","text":"Bagging \u6307\u7684\u662f\u6211\u5011\u628a\u8a13\u7df4\u8cc7\u6599\u91cd\u65b0\u63a1\u6a23\u7522\u751f\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u5982\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Bagging \u4e4b\u67b6\u69cb\u3002\u6839\u64da\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\u5373\u4f7f\u6211\u5011\u7528\u540c\u4e00\u7a2e\u6f14\u7b97\u6cd5\u6211\u5011\u4e5f\u6703\u5f97\u5230\u4e0d\u4e00\u6a23\u7684\u6a21\u578b\uff0c\u4ed6\u7684\u6a39\u662f\u5404\u81ea\u7368\u7acb\u56e0\u6b64\u53ef\u4ee5\u5e73\u884c\u5316\u8655\u7406\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u662f\u96a8\u6a5f\u68ee\u6797\uff0c\u96a8\u6a5f\u68ee\u6797\u9664\u4e86 Bagging \u4e4b\u5916\uff0c\u9084\u6709\u53e6\u4e00\u500b\u96a8\u6a5f\u7684\u56e0\u7d20\u662f\u6bcf\u4e00\u68f5\u6a39\u90fd\u53ea\u80fd\u770b\u5230\u4e00\u90e8\u5206\u7684\u7279\u5fb5\uff0c\u9019\u4e9b\u7279\u5fb5\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a\u7684\u3002","title":"Bagging \u81ea\u52a9\u91cd\u62bd\u7e3d\u5408\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#boosting","text":"Boosting \u5247\u6703\u6839\u64da\u6bcf\u4e00\u7b46\u8a13\u7df4\u8cc7\u6599\u7684\u96e3\u6216\u7c21\u55ae\u7d66\u4e88\u4e0d\u540c\u7684\u6b0a\u91cd\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u9996\u5148\u6211\u5011\u6703\u8a13\u7df4\u4e00\u500b base learner \u7136\u5f8c\u6839\u64da base learner \u9810\u6e2c\u7684\u7d50\u679c\u5c0d\u6216\u932f\u4f86\u5206\u8fa8\u8a72\u7b46\u8cc7\u6599\u662f\u4e00\u500b\u7c21\u55ae\u9084\u662f\u56f0\u96e3\u7684\u8cc7\u6599\u3002\u5c0d\u65bc\u96e3\u7684\u8cc7\u6599\u6211\u5011\u52a0\u5f37\u4ed6\u7684\u6b0a\u91cd\u518d\u8a13\u7df4\u4e00\u500b\u65b0\u7684\u5206\u985e\u5668\u6216\u8ff4\u6b78\u5668\u3002\u6211\u5011\u76ee\u6a19\u662f\u5e0c\u671b\u8a13\u7df4\u5f8c\uff0c\u65b0\u7684\u6a21\u578b\u5728\u9019\u4e9b\u96e3\u7684\u8cc7\u6599\u80fd\u5920\u8868\u73fe\u5f97\u66f4\u597d\u3002\u6211\u5011\u4e0d\u65b7\u91cd\u8907\u9019\u4e9b\u6b65\u9a5f\uff0c\u4e0d\u65b7\u5730\u52a0\u5165\u65b0\u7684 base learner\uff0c\u4e14\u65b0\u7684 base learner \u628a\u904e\u53bb\u8868\u73fe\u4e0d\u597d\u7684\u5730\u65b9\u6539\u5584\uff0c\u9019\u5c31\u662f Boosting \u7cbe\u795e\u3002\u56e0\u6b64 Boosting \u7684\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u6709\u95dc\u806f\u6027\u7684\u505a\u5b8c\u7b2c\u4e00\u68f5\u6a39\u53ef\u80fd\u9032\u884c\u4e0b\u4e00\u68f5\u6a39\u7684\u751f\u6210\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u6709 AdaBoost\u3001Gradient Boosting\uff0c\u5169\u7a2e\u90fd\u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u90fd\u662f\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u76ee\u6a19\u662f\u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6240\u4ee5\u6700\u7d42\u6211\u5011\u8981\u628a\u9019\u9ebc\u591a\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u3002","title":"Boosting \u63a8\u5347\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#stacking","text":"\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Stacking \u67b6\u69cb\u3002Stacking \u9996\u5148\u7522\u751f\u51fa m \u500b\u6a21\u578b\uff0c\u5f7c\u6b64\u9593\u4e26\u4e92\u76f8\u7121\u95dc\u9023\uff0c\u4f8b\u5982\u7b2c\u4e00\u500b\u6a21\u578b\u70ba logistic regression \u7b2c\u4e8c\u500b\u70ba\u6c7a\u7b56\u6a39\u3002\u8a13\u7df4\u5b8c m \u500b\u6a21\u578b\u5f8c\uff0c\u6211\u5011\u8981\u628a\u9019 m \u500b\u6a21\u578b\u5408\u4f75\u5728\u4e00\u8d77\u3002\u5408\u4f75\u7684\u65b9\u5f0f\u662f\u6211\u5011\u518d\u53e6\u5916\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u9019\u500b\u6a21\u578b\u628am\u500b\u6a21\u578b\u7684\u8f38\u51fa\u7576\u6210\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u56e0\u6b64\u6211\u5011\u6703\u6839\u64da\u9019m\u500b\u7279\u5fb5\u5229\u7528\u6574\u9ad4\u5b78\u7fd2\u5176\u4e2d\u7684\u6f14\u7b97\u6cd5\u4f86\u5b78\u7fd2\u4e00\u500b\u6a21\u578b\u4e26\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002","title":"Stacking \u5806\u758a\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#patch-learning","text":"\u5340\u57df\u5b78\u7fd2 ( Patch Learning , PL) \u65bc 2019 \u5e74\u7531\u7f8e\u570b\u5357\u52a0\u5dde\u5927\u5b78 Jerry M. Mendel \u8207 Dongrui Wu \u6240\u63d0\u51fa\u7684\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u65b9\u6cd5\u3002\u6240\u8b02\u7684\u5340\u57df\u5b78\u7fd2\u662f\u80fd\u5920\u6709\u6548\u7684\u638c\u63e1\u8868\u73fe\u4e0d\u597d\u7684\u5340\u9593\uff0c\u7d93\u904e\u8a13\u7df4\u4e00\u500b\u5168\u57df\u7684\u6a21\u578b\u5f8c\u4e26\u4efb\u4e00\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u627e\u51fa\u9019\u4e9b\u8aa4\u5dee\u5927\u7684 Patch\uff0c\u900f\u904e\u591a\u500b\u65b7\u9ede\u7684\u5b78\u7fd2\u6211\u5011\u6703\u5f97\u5230 Global Model\u3001Patch1 Model\u3001Patch2 Model\u2026Patch(n) Model\u3002\u7136\u800c\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u6709\u5f88\u591a\u7a2e\u65b9\u6cd5\u53ef\u4ee5\u6539\u5584\u6211\u5011\u7684\u6a21\u578b\uff0c\u4f8b\u5982\u52a0\u6df1\u548c\u52a0\u5bec\u795e\u7d93\u7db2\u8def\u6216\u662f\u6dfb\u52a0\u4e00\u4e9b\u975e\u7dda\u6027\u7684\u6fc0\u52f5\u51fd\u6578\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u6216\u662f\u4f7f\u7528\u6574\u9ad4\u5b78\u7fd2\u7684\u65b9\u6cd5\u96c6\u5408\u8a31\u591a\u7b56\u7565\uff0c\u6700\u7d42\u5f62\u6210\u4e00\u500b\u5f37\u5b78\u7fd2\u5668\u4e26\u6539\u5584\u67d0\u4e9b\u5340\u57df\u7684\u5f31\u9ede\u3002\u4e0b\u5716\u70ba\u4e00\u500b\u7c21\u55ae\u7684\u5340\u57df\u5b78\u7fd2\u9810\u6e2c\u6d41\u7a0b\u5716\u3002\u5728\u4f7f\u7528\u5340\u57df\u5b78\u7fd2\u6a21\u578b\u524d\u6211\u5011\u8981\u627e\u51fa\u8a72\u6bb5\u8f38\u5165\u6240\u5c0d\u61c9\u7684 Patch\uff0c\u82e5\u8a72\u5340\u9593\u525b\u597d\u843d\u65bc\u6240\u5283\u5b9a\u7684\u7bc4\u570d\u5167\uff0c\u9019\u4e9b\u8f38\u5165\u5c31\u5c07\u6703\u5c0d\u61c9\u5230\u76f8\u5c0d\u61c9\u7684\u5340\u57df\u5b78\u7fd2\uff0c\u5426\u5247\u5c31\u6703\u4f7f\u7528\u5168\u57df\u6a21\u578b\u9032\u884c\u9810\u6e2c\u3002 \u5340\u57df\u5b78\u7fd2\u6a5f\u5236\u5305\u62ec\u4e09\u500b\u90e8\u5206\uff0c\u7c21\u8981\u8aaa\u660e\u5982\u4e0b: (1)\u4f7f\u7528\u6240\u6709\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u4e00\u500b\u5168\u57df\u6a21\u578b; (2)\u6311\u51fa\u5f71\u97ff\u932f\u8aa4\u7387\u8f03\u9ad8\u7684\u8cc7\u6599\uff0c\u518d\u653e\u5165\u500b\u5225\u7684 Patch \u6a21\u578b\u9032\u884c\u5340\u57df\u8a13\u7df4; (3)\u81ea\u8a13\u7df4\u8cc7\u6599\u4e2d\u53bb\u6389\u5df2\u7d93\u88ab\u5c40\u90e8\u6a21\u578b\u7528\u904e\u7684\u8cc7\u6599\uff0c\u518d\u4f7f\u7528\u5269\u4e0b\u7684\u6240\u6709\u8cc7\u6599\u66f4\u65b0\u5168\u57df\u6a21\u578b\u3002\u7576\u8f38\u5165\u8cc7\u6599\u9032\u4f86\u6642\uff0c\u9996\u5148\u5224\u65b7\u9019\u500b\u8f38\u5165\u662f\u4e0d\u662f\u5728\u525b\u525b\u8a18\u4e0b\u7684 Patch \u6a21\u578b\u4e2d\uff0c\u5982\u679c\u662f\u7684\u8a71\uff0c\u5c31\u57f7\u884c Patch \u6a21\u578b\u3002\u5982\u679c\u4e0d\u662f\u7684\u8a71\uff0c\u57f7\u884c\u66f4\u65b0\u5f8c\u7684\u5168\u57df\u6a21\u578b\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5340\u57df\u5b78\u7fd2 (Patch Learning)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/","text":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u96a8\u6a5f\u68ee\u6797\u4ecb\u7d39 \u96a8\u6a5f\u68ee\u6797\u7684\u6a39\u662f\u5982\u4f55\u751f\u6210\uff1f\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u96a8\u6a5f\u68ee\u6797\u5206\u985e\u5668 \u6bd4\u8f03\u96a8\u6a5f\u68ee\u6797\u8207\u6c7a\u7b56\u6a39\u5169\u8005\u5dee\u5225\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Regression)\uff1a \u96a8\u6a5f\u68ee\u6797 \u96a8\u6a5f\u68ee\u6797\u5176\u5be6\u5c31\u662f\u9032\u968e\u7248\u7684\u6c7a\u7b56\u6a39\uff0c\u6240\u8b02\u7684\u68ee\u6797\u5c31\u662f\u7531\u5f88\u591a\u68f5\u6c7a\u7b56\u6a39\u6240\u7d44\u6210\u3002\u96a8\u6a5f\u68ee\u6797\u662f\u4f7f\u7528 Bagging \u52a0\u4e0a\u96a8\u6a5f\u7279\u5fb5\u63a1\u6a23\u7684\u65b9\u6cd5\u6240\u7522\u751f\u51fa\u4f86\u7684\u6574\u9ad4\u5b78\u7fd2\u6f14\u7b97\u6cd5\u3002\u9084\u8a18\u5f97\u5728\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4e2d\uff0c\u7576\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u592a\u5927\u7684\u8a71\u5bb9\u6613\u8b93\u6a21\u578b\u904e\u64ec\u5408\u3002\u56e0\u6b64\u96a8\u6a5f\u68ee\u6797\u85c9\u7531\u591a\u68f5\u4e0d\u540c\u6a39\u7684\u6982\u5ff5\u6240\u7d44\u6210\uff0c\u8b93\u7d50\u679c\u6bd4\u8f03\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\uff0c\u4e26\u4f7f\u5f97\u9810\u6e2c\u80fd\u529b\u66f4\u63d0\u5347\u3002 \u96a8\u6a5f\u68ee\u6797\u7684\u751f\u6210\u65b9\u6cd5 \u9996\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u7136\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u5047\u8a2d\u6211\u5011\u6709\u4e00\u5343\u7b46\u8cc7\u6599\u6211\u5011\u8981\u5f9e\u4e2d\u62bd\u53d6 100 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u9019 100 \u7b46\u8cc7\u6599\u88e1\u9762\u53ef\u80fd\u6703\u6709\u91cd\u8907\u7684\u6578\u64da\u3002\u63a5\u8457\u7b2c\u4e8c\u6b65\u5f9e\u9019\u4e9b\u62bd\u53d6\u51fa\u4f86\u7684\u8cc7\u6599\u4e2d\u6311\u9078 k \u500b\u7279\u5fb5\u7576\u4f5c\u6c7a\u7b56\u56e0\u5b50\u7684\u5f8c\u9078\uff0c\u56e0\u6b64\u6bcf\u4e00\u68f5\u6a39\u53ea\u80fd\u770b\u898b\u90e8\u5206\u7684\u7279\u5fb5\u3002\u7b2c\u4e09\u6b65\u91cd\u8907\u4ee5\u4e0a\u6b65\u9a5f m \u6b21\u4e26\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39\u3002\u900f\u904e Bootstrap \u6b65\u9a5f\u91cd\u8907 m \u6b21\uff0c\u505a\u5b8c\u4e4b\u5f8c\u6211\u5011\u6703\u6709 m \u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u6bcf\u4e00\u7d44\u8a13\u7df4\u8cc7\u6599\u5167\u90fd\u6709 n\u2019 \u7b46\u8cc7\u6599\u3002\u6700\u5f8c\u518d\u900f\u904e\u6bcf\u68f5\u6a39\u7684\u6c7a\u7b56\u4e26\u63a1\u591a\u6578\u6c7a\u6295\u7968\u7684\u65b9\u5f0f\uff0c\u6c7a\u5b9a\u6700\u7d42\u9810\u6e2c\u7684\u985e\u5225\u3002\u56e0\u70ba\u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u7684\u7279\u5fb5\u6578\u91cf\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u6240\u4ee5\u6700\u5f8c\u6c7a\u7b56\u51fa\u4f86\u7684\u7d50\u679c\u90fd\u6703\u4e0d\u4e00\u6a23\u3002\u6700\u5f8c\u518d\u6839\u64da\u4efb\u52d9\u7684\u4e0d\u540c\u4f86\u505a\u8ff4\u6b78\u6216\u662f\u5206\u985e\u7684\u554f\u984c\uff0c\u5982\u679c\u662f\u8ff4\u6b78\u554f\u984c\u6211\u5011\u5c31\u5c07\u9019\u4e9b\u6c7a\u7b56\u6578\u7684\u8f38\u51fa\u505a\u5e73\u5747\u5f97\u5230\u6700\u5f8c\u7b54\u6848\uff0c\u82e5\u662f\u5206\u985e\u554f\u984c\u6211\u5011\u5247\u7528\u6295\u6a19\u63a1\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u4f86\u6574\u5408\u6240\u6709\u6a39\u9810\u6e2c\u7684\u7d50\u679c\u3002 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86 n\u2019 \u7b46\u8cc7\u6599\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c \u91cd\u8907 m \u6b21\uff0c\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39 \u5206\u985e: \u591a\u6578\u6295\u7968\u6a5f\u5236\u9032\u884c\u9810\u6e2c\u3001\u8ff4\u6b78: \u5e73\u5747\u6a5f\u5236\u9032\u884c\u9810\u6e2c \u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\uff1f \u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\u6709\u5169\u7a2e\u65b9\u9762\u53ef\u4ee5\u89e3\u91cb\u3002\u9996\u5148\u7b2c\u4e00\u500b\u662f\u96a8\u6a5f\u53d6\u6a23\uff0c\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6703\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u96a8\u6a5f\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u6b64\u62bd\u53d6\u8cc7\u6599\u7684\u65b9\u5f0f\u53c8\u7a31\u70ba Bootstrap\uff0c\u5b83\u662f\u4e00\u7a2e\u5728\u7d71\u8a08\u5b78\u4e0a\u5e38\u7528\u7684\u8cc7\u6599\u4f30\u8a08\u65b9\u6cd5\u3002\u7b2c\u4e8c\u500b\u89e3\u91cb\u96a8\u6a5f\u7684\u7406\u7531\u662f\u5728\u96a8\u6a5f\u68ee\u6797\u4e2d\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u96a8\u6a5f\u7684\u7279\u5fb5\u9078\u53d6\u3002\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u5f9e n\u2019 \u7b46\u8cc7\u6599\u4e2d\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c\u3002 \u5728 sklearn \u4e2d\uff0c\u6700\u591a\u96a8\u6a5f\u9078\u53d6 \ud835\udc59\ud835\udc5c\ud835\udc54 2 \ud835\udc41 \u500b\u7279\u5fb5 \u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede \u6bcf\u68f5\u6a39\u6703\u7528\u5230\u54ea\u4e9b\u8a13\u7df4\u8cc7\u6599\u53ca\u7279\u5fb5\u90fd\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a \u63a1\u7528\u591a\u500b\u6c7a\u7b56\u6a39\u7684\u6295\u7968\u6a5f\u5236\u4f86\u6539\u5584\u6c7a\u7b56\u6a39 \u8207\u6c7a\u7b56\u6a39\u76f8\u6bd4\uff0c\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408 \u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u7368\u7acb\u7684 \u8a13\u7df4\u6216\u662f\u9810\u6e2c\u7684\u968e\u6bb5\u6bcf\u4e00\u68f5\u6a39\u90fd\u80fd\u5e73\u884c\u5316\u7684\u904b\u884c [\u7a0b\u5f0f\u5be6\u4f5c] \u96a8\u6a5f\u68ee\u6797(\u5206\u985e\u5668) Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestClassifier # \u5efa\u7acb Random Forest Classifier \u6a21\u578b randomForestModel = RandomForestClassifier ( n_estimators = 100 , criterion = 'gini' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = randomForestModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , randomForestModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , randomForestModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.8888888888888888 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u7531\u65bc\u8a13\u7df4\u8cc7\u6599\u7b46\u6578\u4e0d\u591a\uff0c\u56e0\u6b64\u6a21\u578b\u8a13\u7df4\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u96c6\u7684\u5206\u5e03\u3002\u6700\u7d42\u5728\u6e2c\u8a66\u53ca\u9810\u6e2c\u7684\u8868\u73fe\u4e0a\u50c5\u6709 0.88 \u7684\u6e96\u78ba\u7387\u3002 \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6 \u53ea\u8981\u662f\u6c7a\u7b56\u6a39\u7cfb\u5217\u6f14\u7b97\u6cd5\uff0c\u4e0d\u7ba1\u662f\u5206\u985e\u5668\u6216\u662f\u8ff4\u6b78\u5668\u90fd\u80fd\u900f\u904e feature_importances_ \u4f86\u6aa2\u8996\u6a21\u578b\u9810\u6e2c\u5c0d\u65bc\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 print ( '\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: ' , randomForestModel . feature_importances_ ) \u8f38\u51fa\u7d50\u679c\uff1a \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: [0.09864249 0.01363871 0.44211602 0.44560278] \u96a8\u6a5f\u68ee\u6797(\u8ff4\u6b78\u5668) Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestRegressor # \u5efa\u7acbRandomForestRegressor\u6a21\u578b randomForestModel = RandomForestRegressor ( n_estimators = 100 , criterion = 'mse' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = randomForestModel . predict ( x ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#day-14-random-forest","text":"","title":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_1","text":"\u96a8\u6a5f\u68ee\u6797\u4ecb\u7d39 \u96a8\u6a5f\u68ee\u6797\u7684\u6a39\u662f\u5982\u4f55\u751f\u6210\uff1f\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u96a8\u6a5f\u68ee\u6797\u5206\u985e\u5668 \u6bd4\u8f03\u96a8\u6a5f\u68ee\u6797\u8207\u6c7a\u7b56\u6a39\u5169\u8005\u5dee\u5225\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_2","text":"\u96a8\u6a5f\u68ee\u6797\u5176\u5be6\u5c31\u662f\u9032\u968e\u7248\u7684\u6c7a\u7b56\u6a39\uff0c\u6240\u8b02\u7684\u68ee\u6797\u5c31\u662f\u7531\u5f88\u591a\u68f5\u6c7a\u7b56\u6a39\u6240\u7d44\u6210\u3002\u96a8\u6a5f\u68ee\u6797\u662f\u4f7f\u7528 Bagging \u52a0\u4e0a\u96a8\u6a5f\u7279\u5fb5\u63a1\u6a23\u7684\u65b9\u6cd5\u6240\u7522\u751f\u51fa\u4f86\u7684\u6574\u9ad4\u5b78\u7fd2\u6f14\u7b97\u6cd5\u3002\u9084\u8a18\u5f97\u5728\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4e2d\uff0c\u7576\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u592a\u5927\u7684\u8a71\u5bb9\u6613\u8b93\u6a21\u578b\u904e\u64ec\u5408\u3002\u56e0\u6b64\u96a8\u6a5f\u68ee\u6797\u85c9\u7531\u591a\u68f5\u4e0d\u540c\u6a39\u7684\u6982\u5ff5\u6240\u7d44\u6210\uff0c\u8b93\u7d50\u679c\u6bd4\u8f03\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\uff0c\u4e26\u4f7f\u5f97\u9810\u6e2c\u80fd\u529b\u66f4\u63d0\u5347\u3002","title":"\u96a8\u6a5f\u68ee\u6797"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_3","text":"\u9996\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u7136\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u5047\u8a2d\u6211\u5011\u6709\u4e00\u5343\u7b46\u8cc7\u6599\u6211\u5011\u8981\u5f9e\u4e2d\u62bd\u53d6 100 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u9019 100 \u7b46\u8cc7\u6599\u88e1\u9762\u53ef\u80fd\u6703\u6709\u91cd\u8907\u7684\u6578\u64da\u3002\u63a5\u8457\u7b2c\u4e8c\u6b65\u5f9e\u9019\u4e9b\u62bd\u53d6\u51fa\u4f86\u7684\u8cc7\u6599\u4e2d\u6311\u9078 k \u500b\u7279\u5fb5\u7576\u4f5c\u6c7a\u7b56\u56e0\u5b50\u7684\u5f8c\u9078\uff0c\u56e0\u6b64\u6bcf\u4e00\u68f5\u6a39\u53ea\u80fd\u770b\u898b\u90e8\u5206\u7684\u7279\u5fb5\u3002\u7b2c\u4e09\u6b65\u91cd\u8907\u4ee5\u4e0a\u6b65\u9a5f m \u6b21\u4e26\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39\u3002\u900f\u904e Bootstrap \u6b65\u9a5f\u91cd\u8907 m \u6b21\uff0c\u505a\u5b8c\u4e4b\u5f8c\u6211\u5011\u6703\u6709 m \u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u6bcf\u4e00\u7d44\u8a13\u7df4\u8cc7\u6599\u5167\u90fd\u6709 n\u2019 \u7b46\u8cc7\u6599\u3002\u6700\u5f8c\u518d\u900f\u904e\u6bcf\u68f5\u6a39\u7684\u6c7a\u7b56\u4e26\u63a1\u591a\u6578\u6c7a\u6295\u7968\u7684\u65b9\u5f0f\uff0c\u6c7a\u5b9a\u6700\u7d42\u9810\u6e2c\u7684\u985e\u5225\u3002\u56e0\u70ba\u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u7684\u7279\u5fb5\u6578\u91cf\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u6240\u4ee5\u6700\u5f8c\u6c7a\u7b56\u51fa\u4f86\u7684\u7d50\u679c\u90fd\u6703\u4e0d\u4e00\u6a23\u3002\u6700\u5f8c\u518d\u6839\u64da\u4efb\u52d9\u7684\u4e0d\u540c\u4f86\u505a\u8ff4\u6b78\u6216\u662f\u5206\u985e\u7684\u554f\u984c\uff0c\u5982\u679c\u662f\u8ff4\u6b78\u554f\u984c\u6211\u5011\u5c31\u5c07\u9019\u4e9b\u6c7a\u7b56\u6578\u7684\u8f38\u51fa\u505a\u5e73\u5747\u5f97\u5230\u6700\u5f8c\u7b54\u6848\uff0c\u82e5\u662f\u5206\u985e\u554f\u984c\u6211\u5011\u5247\u7528\u6295\u6a19\u63a1\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u4f86\u6574\u5408\u6240\u6709\u6a39\u9810\u6e2c\u7684\u7d50\u679c\u3002 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86 n\u2019 \u7b46\u8cc7\u6599\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c \u91cd\u8907 m \u6b21\uff0c\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39 \u5206\u985e: \u591a\u6578\u6295\u7968\u6a5f\u5236\u9032\u884c\u9810\u6e2c\u3001\u8ff4\u6b78: \u5e73\u5747\u6a5f\u5236\u9032\u884c\u9810\u6e2c","title":"\u96a8\u6a5f\u68ee\u6797\u7684\u751f\u6210\u65b9\u6cd5"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_4","text":"\u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\u6709\u5169\u7a2e\u65b9\u9762\u53ef\u4ee5\u89e3\u91cb\u3002\u9996\u5148\u7b2c\u4e00\u500b\u662f\u96a8\u6a5f\u53d6\u6a23\uff0c\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6703\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u96a8\u6a5f\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u6b64\u62bd\u53d6\u8cc7\u6599\u7684\u65b9\u5f0f\u53c8\u7a31\u70ba Bootstrap\uff0c\u5b83\u662f\u4e00\u7a2e\u5728\u7d71\u8a08\u5b78\u4e0a\u5e38\u7528\u7684\u8cc7\u6599\u4f30\u8a08\u65b9\u6cd5\u3002\u7b2c\u4e8c\u500b\u89e3\u91cb\u96a8\u6a5f\u7684\u7406\u7531\u662f\u5728\u96a8\u6a5f\u68ee\u6797\u4e2d\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u96a8\u6a5f\u7684\u7279\u5fb5\u9078\u53d6\u3002\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u5f9e n\u2019 \u7b46\u8cc7\u6599\u4e2d\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c\u3002 \u5728 sklearn \u4e2d\uff0c\u6700\u591a\u96a8\u6a5f\u9078\u53d6 \ud835\udc59\ud835\udc5c\ud835\udc54 2 \ud835\udc41 \u500b\u7279\u5fb5","title":"\u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\uff1f"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_5","text":"\u6bcf\u68f5\u6a39\u6703\u7528\u5230\u54ea\u4e9b\u8a13\u7df4\u8cc7\u6599\u53ca\u7279\u5fb5\u90fd\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a \u63a1\u7528\u591a\u500b\u6c7a\u7b56\u6a39\u7684\u6295\u7968\u6a5f\u5236\u4f86\u6539\u5584\u6c7a\u7b56\u6a39 \u8207\u6c7a\u7b56\u6a39\u76f8\u6bd4\uff0c\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408 \u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u7368\u7acb\u7684 \u8a13\u7df4\u6216\u662f\u9810\u6e2c\u7684\u968e\u6bb5\u6bcf\u4e00\u68f5\u6a39\u90fd\u80fd\u5e73\u884c\u5316\u7684\u904b\u884c","title":"\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_6","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_7","text":"Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestClassifier # \u5efa\u7acb Random Forest Classifier \u6a21\u578b randomForestModel = RandomForestClassifier ( n_estimators = 100 , criterion = 'gini' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = randomForestModel . predict ( X_train )","title":"\u96a8\u6a5f\u68ee\u6797(\u5206\u985e\u5668)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , randomForestModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , randomForestModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.8888888888888888 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u7531\u65bc\u8a13\u7df4\u8cc7\u6599\u7b46\u6578\u4e0d\u591a\uff0c\u56e0\u6b64\u6a21\u578b\u8a13\u7df4\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u96c6\u7684\u5206\u5e03\u3002\u6700\u7d42\u5728\u6e2c\u8a66\u53ca\u9810\u6e2c\u7684\u8868\u73fe\u4e0a\u50c5\u6709 0.88 \u7684\u6e96\u78ba\u7387\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_8","text":"\u53ea\u8981\u662f\u6c7a\u7b56\u6a39\u7cfb\u5217\u6f14\u7b97\u6cd5\uff0c\u4e0d\u7ba1\u662f\u5206\u985e\u5668\u6216\u662f\u8ff4\u6b78\u5668\u90fd\u80fd\u900f\u904e feature_importances_ \u4f86\u6aa2\u8996\u6a21\u578b\u9810\u6e2c\u5c0d\u65bc\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 print ( '\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: ' , randomForestModel . feature_importances_ ) \u8f38\u51fa\u7d50\u679c\uff1a \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: [0.09864249 0.01363871 0.44211602 0.44560278]","title":"\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_9","text":"Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestRegressor # \u5efa\u7acbRandomForestRegressor\u6a21\u578b randomForestModel = RandomForestRegressor ( n_estimators = 100 , criterion = 'mse' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = randomForestModel . predict ( x ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u96a8\u6a5f\u68ee\u6797(\u8ff4\u6b78\u5668)"},{"location":"15.XGBoost/","text":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 XGBoost \u4ecb\u7d39 XGBoost \u662f\u4ec0\u9ebc\uff1f\u70ba\u4ec0\u9ebc\u5b83\u90a3\u9ebc\u5f37\u5927\uff1f XGBoost \u512a\u9ede \u6bd4\u8f03\u5169\u7a2e\u6574\u9ad4\u5b78\u7fd2\u67b6\u69cb\u5dee\u7570\uff1f Bagging vs. Boosting Boosting vs. Decision Tree Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b \u5be6\u4f5c XGBoost \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u6bd4\u8f03 Bagging \u8207 Boosting \u5169\u8005\u5dee\u5225 \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Regression)\uff1a \u4eba\u4eba\u9a5a\u5947\u7684 XGBoost XGboost \u5168\u540d\u70ba eXtreme Gradient Boosting\uff0c\u662f\u76ee\u524d Kaggle \u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u5230\u7684\u7b97\u6cd5\uff0c\u540c\u6642\u4e5f\u662f\u591a\u6578\u5f97\u734e\u8005\u6240\u4f7f\u7528\u7684\u6a21\u578b\u3002\u6b64\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u662f\u7531\u83ef\u76db\u9813\u5927\u5b78\u535a\u58eb\u751f\u9673\u5929\u5947\u6240\u63d0\u51fa\u4f86\u7684\uff0c\u5b83\u662f\u4ee5 Gradient Boosting \u70ba\u57fa\u790e\u4e0b\u53bb\u5be6\u4f5c\uff0c\u4e26\u6dfb\u52a0\u4e00\u4e9b\u65b0\u7684\u6280\u5de7\u3002\u5b83\u53ef\u4ee5\u8aaa\u662f\u7d50\u5408 Bagging \u548c Boosting \u7684\u512a\u9ede\u3002XGboost \u4fdd\u6709 Gradient Boosting \u7684\u505a\u6cd5\uff0c\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u95dc\u806f\u7684\uff0c\u76ee\u6a19\u662f\u5e0c\u671b\u5f8c\u9762\u751f\u6210\u7684\u6a39\u80fd\u5920\u4fee\u6b63\u524d\u9762\u4e00\u68f5\u6a39\u72af\u932f\u7684\u5730\u65b9\u3002\u6b64\u5916 XGboost \u662f\u63a1\u7528\u7279\u5fb5\u96a8\u6a5f\u63a1\u6a23\u7684\u6280\u5de7\uff0c\u548c\u96a8\u6a5f\u68ee\u6797\u4e00\u6a23\u5728\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\u7684\u6642\u5019\u96a8\u6a5f\u62bd\u53d6\u7279\u5fb5\uff0c\u56e0\u6b64\u5728\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u4e2d\u4e26\u4e0d\u6703\u6bcf\u4e00\u6b21\u90fd\u62ff\u5168\u90e8\u7684\u7279\u5fb5\u53c3\u8207\u6c7a\u7b56\u3002\u6b64\u5916\u70ba\u4e86\u8b93\u6a21\u578b\u904e\u65bc\u8907\u96dc\uff0cXGboost \u5728\u76ee\u6a19\u51fd\u6578\u6dfb\u52a0\u4e86\u6a19\u6e96\u5316\u3002\u56e0\u70ba\u6a21\u578b\u5728\u8a13\u7df4\u6642\u70ba\u4e86\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\uff0c\u6703\u7522\u751f\u5f88\u591a\u9ad8\u6b21\u9805\u7684\u51fd\u6578\uff0c\u4f46\u53cd\u800c\u5bb9\u6613\u88ab\u96dc\u8a0a\u5e72\u64fe\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64 L1/L2 Regularization \u76ee\u7684\u662f\u8b93\u640d\u5931\u51fd\u6578\u66f4\u4f73\u5e73\u6ed1\uff0c\u4e14\u6297\u96dc\u8a0a\u5e72\u64fe\u80fd\u529b\u66f4\u5927\u3002\u6700\u5f8c XGboost \u9084\u7528\u5230\u4e86\u4e00\u968e\u5c0e\u6578\u548c\u4e8c\u968e\u5c0e\u6578\u4f86\u751f\u6210\u4e0b\u4e00\u68f5\u6a39\u3002\u5176\u4e2d Gradient \u5c31\u662f\u6240\u8b02\u7684\u4e00\u968e\u5c0e\u6578\uff0c\u800c Hessian \u5373\u70ba\u4e8c\u968e\u5c0e\u6578\u3002 XGBoost \u512a\u9ede XGBoost \u9664\u4e86\u53ef\u4ee5\u505a\u5206\u985e\u4e5f\u80fd\u9032\u884c\u8ff4\u6b78\u9023\u7e8c\u6027\u6578\u503c\u7684\u9810\u6e2c\uff0c\u800c\u4e14\u6548\u679c\u901a\u5e38\u90fd\u4e0d\u5dee\u3002\u4e26\u900f\u904e Boosting \u6280\u5de7\u5c07\u8a31\u591a\u5f31\u6c7a\u7b56\u6a39\u96c6\u6210\u5728\u4e00\u8d77\u5f62\u6210\u4e00\u500b\u5f37\u7684\u9810\u6e2c\u6a21\u578b\u3002 \u5229\u7528\u4e86\u4e8c\u968e\u68af\u5ea6\u4f86\u5c0d\u7bc0\u9ede\u9032\u884c\u5283\u5206 \u5229\u7528\u5c40\u90e8\u8fd1\u4f3c\u7b97\u6cd5\u5c0d\u5206\u88c2\u7bc0\u9ede\u9032\u884c\u512a\u5316 \u5728\u640d\u5931\u51fd\u6578\u4e2d\u52a0\u5165\u4e86 L1/L2 \u9805\uff0c\u63a7\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6 \u63d0\u4f9b GPU \u5e73\u884c\u5316\u904b\u7b97 Bagging vs. Boosting \u5728\u9019\u88e1\u5e6b\u5927\u5bb6\u56de\u9867\u4e00\u4e0b\u6574\u9ad4\u5b78\u7fd2\u4e2d\u7684 Bagging \u8207 Boosting \u5169\u8005\u9593\u7684\u5dee\u7570\u3002\u9996\u5148 Bagging \u900f\u904e\u96a8\u6a5f\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\uff0c\u6700\u91cd\u8981\u7684\u662f\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb\u4e26\u7121\u95dc\u806f\u3002\u5148\u524d\u6240\u63d0\u5230\u7684\u96a8\u6a5f\u68ee\u6797\u5c31\u662f Bagging \u7684\u5be6\u4f8b\u3002\u53e6\u5916 Boosting \u5247\u662f\u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u6240\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc\u3002\u672c\u7ae0\u6240\u63d0\u53ca\u7684 XGBoost \u5c31\u662f Boosting \u65b9\u6cd5\u7684\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u6b63\u662f\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6539\u5584\u4e86\u4e0a\u4e00\u68f5\u6a39\u5b78\u7fd2\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u56e0\u6b64 Boosting \u7684\u6a21\u578b\u901a\u5e38\u6703\u6bd4 Bagging \u9084\u4f86\u7684\u7cbe\u6e96\u3002 Bagging \u900f\u904e\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb Boosting \u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc Boosting vs. Decision Tree \u6211\u5011\u518d\u8207\u6700\u4e00\u958b\u59cb\u6240\u63d0\u7684\u6c7a\u7b56\u6a39\u505a\u6bd4\u8f03\u3002\u6c7a\u7b56\u6a39\u901a\u5e38\u70ba\u4e00\u68f5\u8907\u96dc\u7684\u6a39\uff0c\u800c\u5728 Boosting \u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u7684\u6a39\u90fd\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6700\u7d42\u6211\u5011\u8981\u628a\u6240\u6709\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u8f38\u51fa\u3002 Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b AdaBoost \u662f\u7531 Yoav Freund \u548c Robert Schapire \u65bc 1995 \u5e74\u63d0\u51fa\u3002\u6240\u8b02\u7684\u81ea\u9069\u61c9\u662f\u8868\u793a\u6839\u64da\u5f31\u5b78\u7fd2\u7684\u5b78\u7fd2\u8aa4\u5dee\u7387\u8868\u73fe\u4f86\u66f4\u65b0\u8a13\u7df4\u6a23\u672c\u7684\u6b0a\u91cd\uff0c\u7136\u5f8c\u57fa\u65bc\u8abf\u6574\u6b0a\u91cd\u5f8c\u7684\u8a13\u7df4\u96c6\u4f86\u8a13\u7df4\u7b2c\u4e8c\u500b\u5f31\u5b78\u7fd2\u5668\uff0c\u85c9\u7531\u6b64\u65b9\u6cd5\u4e0d\u65b7\u7684\u8fed\u4ee3\u4e0b\u53bb\u3002 AdaBoost\uff08Adaptive Boosting) AdaBoostClassifier AdaBoostRegressor Gradient Boosting \u7531 Friedman \u65bc 1999 \u5e74\u63d0\u51fa\u3002\u5176\u4e2d GBDT (Gradient Boosting Decision Tree) \u7684\u5f31\u5b78\u7fd2\u5668\u50c5\u9650\u65bc\u53ea\u80fd\u4f7f\u7528 CART \u6c7a\u7b56\u6a39\u6a21\u578b\uff0c\u4e26\u63a1\u7528\u52a0\u6cd5\u6a21\u578b\u7684\u524d\u5411\u5206\u6b65\u7b97\u6cd5\u4f86\u89e3\u6c7a\u5206\u985e\u548c\u8ff4\u6b78\u554f\u984c\u3002 Gradient Boosting GradientBoostingClassifier GradientBoostingRegressor \u63a5\u4e0b\u4f86\u4ecb\u7d39\u4e09\u500b\u8fd1\u5e74\u4e09\u500b\u5f37\u5927\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u3002\u9996\u5148 XGBoost \u6700\u521d\u662f\u7531\u9673\u5929\u5947\u65bc 2014 \u5e74 3 \u6708\u767c\u8d77\u7684\u4e00\u500b\u7814\u7a76\u9805\u76ee\uff0c\u4e26\u5728\u77ed\u6642\u9593\u5167\u6210\u70ba\u7af6\u8cfd\u4e2d\u7684\u71b1\u9580\u7684\u6a21\u578b\u3002\u63a5\u8457\u65bc 2017 \u5e74 1 \u6708\u5fae\u8edf\u767c\u5e03\u4e86\u7b2c\u4e00\u500b\u7a69\u5b9a\u7684 LightGBM \u7248\u672c\u3002\u5b83\u662f\u4e00\u500b\u57fa\u65bc Gradient Boosting \u7684\u8f15\u91cf\u7d1a\u7684\u6f14\u7b97\u6cd5\uff0c\u512a\u9ede\u5728\u65bc\u4f7f\u7528\u5c11\u91cf\u8cc7\u6e90\u3001\u66f4\u5feb\u7684\u8a13\u7df4\u6548\u7387\u5f97\u5230\u66f4\u597d\u7684\u6e96\u78ba\u5ea6\u3002\u53e6\u5916\u5728\u540c\u5e74\u7684 4 \u6708\uff0c\u4fc4\u7f85\u65af\u7684\u4e00\u5bb6\u79d1\u6280\u516c\u53f8 Yandex \u767c\u5e03\u4e86 CatBoost \uff0c\u5176\u6838\u5fc3\u4f9d\u7136\u4f7f\u7528\u4e86 Gradient Boosting \u6280\u5de7\uff0c\u4e26\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7279\u5225\u7684\u8f49\u63db\u4e26\u7522\u751f\u65b0\u7684\u6578\u503c\u578b\u7279\u5fb5\u3002 \u672a\u4f86\u5e7e\u5929\u5c07\u6703\u4ecb\u7d39 LightGBM \u8207 CatBoost \u54e6\uff01 [\u7a0b\u5f0f\u5be6\u4f5c] XGBoost \u5206\u985e\u5668 Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , xgboostModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , xgboostModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8207\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u548c\u96a8\u6a5f\u68ee\u6797\u5169\u500b\u6a21\u578b\u76f8\u6bd4\u8f03\u3002\u662f\u4e0d\u662f XGBoost \u6709\u8457\u66f4\u597d\u7684\u9810\u6e2c\u7d50\u679c\u5462\uff1f\u56e0\u70ba\u6709\u4e86 Gradient Boosting \u5b78\u7fd2\u6a5f\u5236\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u9810\u6e2c\u80fd\u529b\u3002\u5728\u5b78\u7fd2\u904e\u7a0b\u4e2d\u5c07\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u5c24\u5176\u662f\u6a58\u8272 (Versicolour) \u8207\u7da0\u8272 (Virginica) \u4ea4\u754c\u8655\u6709\u66f4\u597d\u7684\u8a55\u4f30\u80fd\u529b\u3002 XGBoost (\u8ff4\u6b78\u5668) Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import xgboost as xgb # \u5efa\u7acb XGBRegressor \u6a21\u578b xgbrModel = xgb . XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgbrModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgbrModel . predict ( x ) Reference XGboost\u5165\u9580\u7d93\u9a57\u5206\u4eab-\u8d85\u53c3\u6578\u89e3\u6790 \u95dc\u65bc XGBoost 20 \u500b FAQ \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost"},{"location":"15.XGBoost/#day-15-xgboost","text":"","title":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost"},{"location":"15.XGBoost/#_1","text":"XGBoost \u4ecb\u7d39 XGBoost \u662f\u4ec0\u9ebc\uff1f\u70ba\u4ec0\u9ebc\u5b83\u90a3\u9ebc\u5f37\u5927\uff1f XGBoost \u512a\u9ede \u6bd4\u8f03\u5169\u7a2e\u6574\u9ad4\u5b78\u7fd2\u67b6\u69cb\u5dee\u7570\uff1f Bagging vs. Boosting Boosting vs. Decision Tree Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b \u5be6\u4f5c XGBoost \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u6bd4\u8f03 Bagging \u8207 Boosting \u5169\u8005\u5dee\u5225 \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"15.XGBoost/#xgboost","text":"XGboost \u5168\u540d\u70ba eXtreme Gradient Boosting\uff0c\u662f\u76ee\u524d Kaggle \u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u5230\u7684\u7b97\u6cd5\uff0c\u540c\u6642\u4e5f\u662f\u591a\u6578\u5f97\u734e\u8005\u6240\u4f7f\u7528\u7684\u6a21\u578b\u3002\u6b64\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u662f\u7531\u83ef\u76db\u9813\u5927\u5b78\u535a\u58eb\u751f\u9673\u5929\u5947\u6240\u63d0\u51fa\u4f86\u7684\uff0c\u5b83\u662f\u4ee5 Gradient Boosting \u70ba\u57fa\u790e\u4e0b\u53bb\u5be6\u4f5c\uff0c\u4e26\u6dfb\u52a0\u4e00\u4e9b\u65b0\u7684\u6280\u5de7\u3002\u5b83\u53ef\u4ee5\u8aaa\u662f\u7d50\u5408 Bagging \u548c Boosting \u7684\u512a\u9ede\u3002XGboost \u4fdd\u6709 Gradient Boosting \u7684\u505a\u6cd5\uff0c\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u95dc\u806f\u7684\uff0c\u76ee\u6a19\u662f\u5e0c\u671b\u5f8c\u9762\u751f\u6210\u7684\u6a39\u80fd\u5920\u4fee\u6b63\u524d\u9762\u4e00\u68f5\u6a39\u72af\u932f\u7684\u5730\u65b9\u3002\u6b64\u5916 XGboost \u662f\u63a1\u7528\u7279\u5fb5\u96a8\u6a5f\u63a1\u6a23\u7684\u6280\u5de7\uff0c\u548c\u96a8\u6a5f\u68ee\u6797\u4e00\u6a23\u5728\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\u7684\u6642\u5019\u96a8\u6a5f\u62bd\u53d6\u7279\u5fb5\uff0c\u56e0\u6b64\u5728\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u4e2d\u4e26\u4e0d\u6703\u6bcf\u4e00\u6b21\u90fd\u62ff\u5168\u90e8\u7684\u7279\u5fb5\u53c3\u8207\u6c7a\u7b56\u3002\u6b64\u5916\u70ba\u4e86\u8b93\u6a21\u578b\u904e\u65bc\u8907\u96dc\uff0cXGboost \u5728\u76ee\u6a19\u51fd\u6578\u6dfb\u52a0\u4e86\u6a19\u6e96\u5316\u3002\u56e0\u70ba\u6a21\u578b\u5728\u8a13\u7df4\u6642\u70ba\u4e86\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\uff0c\u6703\u7522\u751f\u5f88\u591a\u9ad8\u6b21\u9805\u7684\u51fd\u6578\uff0c\u4f46\u53cd\u800c\u5bb9\u6613\u88ab\u96dc\u8a0a\u5e72\u64fe\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64 L1/L2 Regularization \u76ee\u7684\u662f\u8b93\u640d\u5931\u51fd\u6578\u66f4\u4f73\u5e73\u6ed1\uff0c\u4e14\u6297\u96dc\u8a0a\u5e72\u64fe\u80fd\u529b\u66f4\u5927\u3002\u6700\u5f8c XGboost \u9084\u7528\u5230\u4e86\u4e00\u968e\u5c0e\u6578\u548c\u4e8c\u968e\u5c0e\u6578\u4f86\u751f\u6210\u4e0b\u4e00\u68f5\u6a39\u3002\u5176\u4e2d Gradient \u5c31\u662f\u6240\u8b02\u7684\u4e00\u968e\u5c0e\u6578\uff0c\u800c Hessian \u5373\u70ba\u4e8c\u968e\u5c0e\u6578\u3002","title":"\u4eba\u4eba\u9a5a\u5947\u7684 XGBoost"},{"location":"15.XGBoost/#xgboost_1","text":"XGBoost \u9664\u4e86\u53ef\u4ee5\u505a\u5206\u985e\u4e5f\u80fd\u9032\u884c\u8ff4\u6b78\u9023\u7e8c\u6027\u6578\u503c\u7684\u9810\u6e2c\uff0c\u800c\u4e14\u6548\u679c\u901a\u5e38\u90fd\u4e0d\u5dee\u3002\u4e26\u900f\u904e Boosting \u6280\u5de7\u5c07\u8a31\u591a\u5f31\u6c7a\u7b56\u6a39\u96c6\u6210\u5728\u4e00\u8d77\u5f62\u6210\u4e00\u500b\u5f37\u7684\u9810\u6e2c\u6a21\u578b\u3002 \u5229\u7528\u4e86\u4e8c\u968e\u68af\u5ea6\u4f86\u5c0d\u7bc0\u9ede\u9032\u884c\u5283\u5206 \u5229\u7528\u5c40\u90e8\u8fd1\u4f3c\u7b97\u6cd5\u5c0d\u5206\u88c2\u7bc0\u9ede\u9032\u884c\u512a\u5316 \u5728\u640d\u5931\u51fd\u6578\u4e2d\u52a0\u5165\u4e86 L1/L2 \u9805\uff0c\u63a7\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6 \u63d0\u4f9b GPU \u5e73\u884c\u5316\u904b\u7b97","title":"XGBoost \u512a\u9ede"},{"location":"15.XGBoost/#bagging-vs-boosting","text":"\u5728\u9019\u88e1\u5e6b\u5927\u5bb6\u56de\u9867\u4e00\u4e0b\u6574\u9ad4\u5b78\u7fd2\u4e2d\u7684 Bagging \u8207 Boosting \u5169\u8005\u9593\u7684\u5dee\u7570\u3002\u9996\u5148 Bagging \u900f\u904e\u96a8\u6a5f\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\uff0c\u6700\u91cd\u8981\u7684\u662f\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb\u4e26\u7121\u95dc\u806f\u3002\u5148\u524d\u6240\u63d0\u5230\u7684\u96a8\u6a5f\u68ee\u6797\u5c31\u662f Bagging \u7684\u5be6\u4f8b\u3002\u53e6\u5916 Boosting \u5247\u662f\u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u6240\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc\u3002\u672c\u7ae0\u6240\u63d0\u53ca\u7684 XGBoost \u5c31\u662f Boosting \u65b9\u6cd5\u7684\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u6b63\u662f\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6539\u5584\u4e86\u4e0a\u4e00\u68f5\u6a39\u5b78\u7fd2\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u56e0\u6b64 Boosting \u7684\u6a21\u578b\u901a\u5e38\u6703\u6bd4 Bagging \u9084\u4f86\u7684\u7cbe\u6e96\u3002 Bagging \u900f\u904e\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb Boosting \u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc","title":"Bagging vs. Boosting"},{"location":"15.XGBoost/#boosting-vs-decision-tree","text":"\u6211\u5011\u518d\u8207\u6700\u4e00\u958b\u59cb\u6240\u63d0\u7684\u6c7a\u7b56\u6a39\u505a\u6bd4\u8f03\u3002\u6c7a\u7b56\u6a39\u901a\u5e38\u70ba\u4e00\u68f5\u8907\u96dc\u7684\u6a39\uff0c\u800c\u5728 Boosting \u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u7684\u6a39\u90fd\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6700\u7d42\u6211\u5011\u8981\u628a\u6240\u6709\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u8f38\u51fa\u3002","title":"Boosting vs. Decision Tree"},{"location":"15.XGBoost/#boosting","text":"AdaBoost \u662f\u7531 Yoav Freund \u548c Robert Schapire \u65bc 1995 \u5e74\u63d0\u51fa\u3002\u6240\u8b02\u7684\u81ea\u9069\u61c9\u662f\u8868\u793a\u6839\u64da\u5f31\u5b78\u7fd2\u7684\u5b78\u7fd2\u8aa4\u5dee\u7387\u8868\u73fe\u4f86\u66f4\u65b0\u8a13\u7df4\u6a23\u672c\u7684\u6b0a\u91cd\uff0c\u7136\u5f8c\u57fa\u65bc\u8abf\u6574\u6b0a\u91cd\u5f8c\u7684\u8a13\u7df4\u96c6\u4f86\u8a13\u7df4\u7b2c\u4e8c\u500b\u5f31\u5b78\u7fd2\u5668\uff0c\u85c9\u7531\u6b64\u65b9\u6cd5\u4e0d\u65b7\u7684\u8fed\u4ee3\u4e0b\u53bb\u3002 AdaBoost\uff08Adaptive Boosting) AdaBoostClassifier AdaBoostRegressor Gradient Boosting \u7531 Friedman \u65bc 1999 \u5e74\u63d0\u51fa\u3002\u5176\u4e2d GBDT (Gradient Boosting Decision Tree) \u7684\u5f31\u5b78\u7fd2\u5668\u50c5\u9650\u65bc\u53ea\u80fd\u4f7f\u7528 CART \u6c7a\u7b56\u6a39\u6a21\u578b\uff0c\u4e26\u63a1\u7528\u52a0\u6cd5\u6a21\u578b\u7684\u524d\u5411\u5206\u6b65\u7b97\u6cd5\u4f86\u89e3\u6c7a\u5206\u985e\u548c\u8ff4\u6b78\u554f\u984c\u3002 Gradient Boosting GradientBoostingClassifier GradientBoostingRegressor \u63a5\u4e0b\u4f86\u4ecb\u7d39\u4e09\u500b\u8fd1\u5e74\u4e09\u500b\u5f37\u5927\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u3002\u9996\u5148 XGBoost \u6700\u521d\u662f\u7531\u9673\u5929\u5947\u65bc 2014 \u5e74 3 \u6708\u767c\u8d77\u7684\u4e00\u500b\u7814\u7a76\u9805\u76ee\uff0c\u4e26\u5728\u77ed\u6642\u9593\u5167\u6210\u70ba\u7af6\u8cfd\u4e2d\u7684\u71b1\u9580\u7684\u6a21\u578b\u3002\u63a5\u8457\u65bc 2017 \u5e74 1 \u6708\u5fae\u8edf\u767c\u5e03\u4e86\u7b2c\u4e00\u500b\u7a69\u5b9a\u7684 LightGBM \u7248\u672c\u3002\u5b83\u662f\u4e00\u500b\u57fa\u65bc Gradient Boosting \u7684\u8f15\u91cf\u7d1a\u7684\u6f14\u7b97\u6cd5\uff0c\u512a\u9ede\u5728\u65bc\u4f7f\u7528\u5c11\u91cf\u8cc7\u6e90\u3001\u66f4\u5feb\u7684\u8a13\u7df4\u6548\u7387\u5f97\u5230\u66f4\u597d\u7684\u6e96\u78ba\u5ea6\u3002\u53e6\u5916\u5728\u540c\u5e74\u7684 4 \u6708\uff0c\u4fc4\u7f85\u65af\u7684\u4e00\u5bb6\u79d1\u6280\u516c\u53f8 Yandex \u767c\u5e03\u4e86 CatBoost \uff0c\u5176\u6838\u5fc3\u4f9d\u7136\u4f7f\u7528\u4e86 Gradient Boosting \u6280\u5de7\uff0c\u4e26\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7279\u5225\u7684\u8f49\u63db\u4e26\u7522\u751f\u65b0\u7684\u6578\u503c\u578b\u7279\u5fb5\u3002 \u672a\u4f86\u5e7e\u5929\u5c07\u6703\u4ecb\u7d39 LightGBM \u8207 CatBoost \u54e6\uff01","title":"Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b"},{"location":"15.XGBoost/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"15.XGBoost/#xgboost_2","text":"Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train )","title":"XGBoost \u5206\u985e\u5668"},{"location":"15.XGBoost/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , xgboostModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , xgboostModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8207\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u548c\u96a8\u6a5f\u68ee\u6797\u5169\u500b\u6a21\u578b\u76f8\u6bd4\u8f03\u3002\u662f\u4e0d\u662f XGBoost \u6709\u8457\u66f4\u597d\u7684\u9810\u6e2c\u7d50\u679c\u5462\uff1f\u56e0\u70ba\u6709\u4e86 Gradient Boosting \u5b78\u7fd2\u6a5f\u5236\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u9810\u6e2c\u80fd\u529b\u3002\u5728\u5b78\u7fd2\u904e\u7a0b\u4e2d\u5c07\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u5c24\u5176\u662f\u6a58\u8272 (Versicolour) \u8207\u7da0\u8272 (Virginica) \u4ea4\u754c\u8655\u6709\u66f4\u597d\u7684\u8a55\u4f30\u80fd\u529b\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"15.XGBoost/#xgboost_3","text":"Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import xgboost as xgb # \u5efa\u7acb XGBRegressor \u6a21\u578b xgbrModel = xgb . XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgbrModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgbrModel . predict ( x )","title":"XGBoost (\u8ff4\u6b78\u5668)"},{"location":"15.XGBoost/#reference","text":"XGboost\u5165\u9580\u7d93\u9a57\u5206\u4eab-\u8d85\u53c3\u6578\u89e3\u6790 \u95dc\u65bc XGBoost 20 \u500b FAQ \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"16.Stacking/","text":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 Stacking \u65b9\u6cd5 \u5806\u758a\u6cd5\u7684\u5b78\u7fd2\u6a5f\u5236\u70ba\u4f55\uff1f \u5229\u7528 Stacking \u5be6\u4f5c\u8ff4\u6b78\u5668 \u900f\u904e Stacking Regressor \u5efa\u7acb\u623f\u50f9\u9810\u6e2c\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5806\u758a\u6cd5 (Stacking) \u662f\u6574\u9ad4\u5b78\u7fd2\u4e2d\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u5b83\u662f\u7d50\u5408\u8a31\u591a\u7368\u7acb\u7684\u6a21\u578b\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\uff0c\u4e26\u5c07\u6bcf\u500b\u7368\u7acb\u6a21\u578b\u7684\u8f38\u51fa\u8996\u70ba\u6700\u7d42\u6a21\u578b\u9810\u6e2c\u7684\u8f38\u5165\u7279\u5fb5\uff0c\u6700\u5f8c\u518d\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u6a21\u578b\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u4e8b\u5148\u8a13\u7df4\u4e09\u500b\u57fa\u5e95\u7684\u6a21\u578b (base learner)\uff0c\u9019\u4e09\u500b\u6a21\u578b\u5f7c\u6b64\u4e92\u76f8\u7121\u95dc\u9023\u3002\u7531\u65bc\u6bcf\u4e00\u500b\u6a21\u578b\u6240\u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u80fd\u529b\u90fd\u4e0d\u540c\uff0c\u4e5f\u8a31\u6a21\u578b\u4e00\u5728\u67d0\u500b\u5340\u6bb5\u7684\u8cc7\u6599\u6709\u4e0d\u592a\u597d\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u800c\u6a21\u578b\u4e8c\u80fd\u88dc\u8db3\u6a21\u578b\u4e00\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\u3002\u85c9\u7531\u4e0a\u8ff0\u9019\u500b\u89c0\u9ede\u6211\u5011\u5c07\u4e09\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u8f38\u51fa\u96c6\u5408\u8d77\u4f86(P1\u3001P2\u3001P3)\uff0c\u5982\u679c\u662f\u5206\u985e\u554f\u984c\u53ef\u4ee5\u900f\u904e\u6295\u7968\u65b9\u5f0f\uff0c\u800c\u8ff4\u6b78\u554f\u984c\u53ef\u4ee5\u63a1\u7528\u5e73\u5747\u6cd5\u6216\u662f\u52a0\u6b0a\u5e73\u5747\u6cd5\u5c07\u6240\u6709\u7684\u9810\u6e2c\u505a\u6700\u5f8c\u8a55\u4f30\u3002\u53c8\u6216\u8005\u662f\u53ef\u4ee5\u5c07\u9019\u4e09\u500b\u8f38\u51fa\u503c\u7576\u4f5c\u662f\u65b0\u6a21\u578b\u7684\u7279\u5fb5\u518d\u4e1f\u5165\u4e00\u500b\u6a5f\u5668\u5b78\u578b\u6a21\u578b\u505a\u6700\u5f8c\u7684\u9810\u6e2c\u5f97\u5230\u6700\u7d42\u8f38\u51fa\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c Stacking \u65b9\u6cd5\u5efa\u6a21\u3002\u4e26\u89c0\u5bdf\u540c\u4e00\u7d44\u8cc7\u6599\u5728\u55ae\u4e00\u6a21\u578b\u4e0b\u9810\u6e2c\uff0c\u8207\u52a0\u5165 Stacking \u6a5f\u5236\u5f8c\u7684\u7d50\u679c\u6709\u7121\u6539\u5584\u3002 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u9996\u5148\u6211\u5011\u5920\u904e Sklearn \u5957\u4ef6\u8b80\u5165\u6ce2\u58eb\u9813\u623f\u50f9\u8cc7\u6599\u96c6\uff0c\u4e26\u5c07\u8f38\u5165\u7279\u5fb5\u8207\u623f\u50f9\u5408\u4f75\u6210\u4e00\u500b DataFrame\u3002\u5728\u6b64\u8cc7\u6599\u96c6\u4e2d\u7e3d\u5171\u6709 13 \u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4ee5\u53ca\u4e00\u500b\u8f38\u51fa MEDV \u5373\u70ba\u623f\u50f9\u3002 # load boston_dataset boston_dataset = load_boston () boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston 2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u8457\u91cd\u65bc\u6bd4\u8f03\u6a21\u578b\u7684\u5dee\u7570\uff0c\u56e0\u6b64\u6c92\u6709\u6309\u7167\u6b63\u5e38\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u8d70\u3002\u8cc7\u6599\u8996\u89ba\u5316\u4ee5\u53ca\u524d\u8655\u7406...\u7b49\u662f\u975e\u5e38\u91cd\u8981\u7684\u54e6\uff01\u5728\u6b64\u6b65\u9a5f\u6211\u5011\u5feb\u901f\u7269\u7684\u5c07\u4e7e\u6de8\u7684\u8cc7\u6599\u5207\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0c\u5176\u4e2d\u8a13\u7df4\u96c6 X_train \u8207 y_train \u662f\u5be6\u969b\u53c3\u8207\u884c\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c X_test \u8207 y_test \u662f\u672a\u53c3\u8207\u8a13\u7df4\u7684\u8cc7\u6599\uff0c\u5b83\u662f\u88ab\u62ff\u4f86\u6e2c\u8a66\u8a55\u4f30\u6700\u7d42\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from sklearn.model_selection import train_test_split X = boston . drop ([ 'MEDV' ], axis = 1 ) . values y = boston [[ 'MEDV' ]] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.1 , random_state = 42 ) print ( 'Training data shape:' , X_train . shape ) print ( 'Testing data shape:' , X_test . shape ) \u7531\u65bc Sklearn \u8cc7\u6599\u96c6\u63d0\u4f9b\u7684\u8cc7\u6599\u6a23\u672c\u6578\u6bd4\u8f03\u5c11\uff0c\u56e0\u6b64\u6e2c\u8a66\u96c6\u50c5\u5207\u51fa 0.1 \u7684\u8cc7\u6599\u3002 \u57f7\u884c\u7d50\u679c\uff1a Training data shape: (455, 13) Testing data shape: (51, 13) XGBoost \u6a21\u578b \u56e0\u70ba\u8981\u8207 Stacking \u505a\u4e00\u500b\u6bd4\u8f03\u3002\u56e0\u6b64\u9019\u88e1\u4f7f\u7528 XGBoost \u5148\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4e26\u5c07\u7d50\u679c\u8207 Stacking \u505a\u6bd4\u8f03\u3002 from xgboost import XGBRegressor # \u5efa\u7acb XGBRegressor \u6a21\u578b xgboostModel = XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgboostModel . predict ( X_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , xgboostModel . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , xgboostModel . score ( X_test , y_test )) \u5f9e\u9810\u6e2c\u7d50\u679c\u6211\u5011\u5148\u4f86\u67e5\u770b R2 score\uff0c\u4e00\u5207\u770b\u4f3c\u9084 ok\u3002\u4e0d\u904e\u9019\u88e1\u8981\u547c\u7c72\u5404\u4f4d\u8b80\u8005\u7d55\u4e0d\u8981\u770b R2 \u5206\u6578\u9ad8\u5c31\u9ad8\u8208\u5f97\u592a\u65e9\uff01 \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9999920949016282 \u6e2c\u8a66\u96c6 Score: 0.9292786904177338 \u6211\u5011\u4f86\u770b\u4e00\u4e0b MSE \u5be6\u969b\u7b97\u4e00\u4e0b\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u8aa4\u5dee\u3002\u53ef\u4ee5\u767c\u73fe\u5f88\u660e\u986f\u7684\u904e\u5ea6\u64ec\u5408\u4e86\uff0c\u7c21\u55ae\u4f86\u8aaa\u5728\u8a13\u7df4\u96c6\u7684\u8cc7\u6599\u7b97\u51fa\u4f86\u7684 MSE \u5f88\u5c0f\uff0c\u4f46\u662f\u5728\u6e2c\u8a66\u96c6\u4e2d MSE \u9810\u6e2c\u80fd\u529b\u4e0d\u8db3\u9020\u6210\u8aa4\u5dee\u8b8a\u5927\u3002 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = xgboostModel . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = xgboostModel . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 0.0006847746512112584 \u6e2c\u8a66\u96c6 MSE: 4.415429632025227 Stacking \u6a21\u578b Stacking \u7d50\u5408\u8a31\u591a\u5f31\u5b78\u7fd2\u5668\uff0c\u5c07\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\u7576\u4f5c\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u63a5\u8457\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u5efa\u7acb\u4e86\u56db\u7a2e\u8ff4\u6b78\u5668\uff0c\u5206\u5225\u6709\u96a8\u6a5f\u68ee\u6797\u3001\u652f\u6301\u5411\u91cf\u6a5f\u3001KNN \u8207\u6c7a\u7b56\u6a39\u3002\u6700\u7d42\u7684\u6a21\u578b\u6211\u5011\u63a1\u7528\u5169\u5c64\u96b1\u85cf\u5c64\u7684\u795e\u7d93\u7db2\u8def\u4f5c\u70ba\u6700\u5f8c\u7684\u623f\u50f9\u9810\u6e2c\u8a55\u4f30\u6a21\u578b\u3002 Parameters: - estimators: m \u500b\u5f31\u5b78\u7fd2\u5668\u3002 - final_estimator: \u96c6\u5408\u6240\u6709\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\uff0c\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u9810\u6e2c\u6a21\u578b\u3002\u9810\u8a2d\u70baLogisticRegression\u3002 Attributes: - estimators_: \u67e5\u770b\u5f31\u5b78\u7fd2\u5668\u7d44\u5408\u3002 - final_estimator: \u67e5\u770b\u6700\u7d42\u6574\u5408\u8a13\u7df4\u6a21\u578b\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn import svm from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import StackingRegressor from sklearn.neural_network import MLPRegressor estimators = [ ( 'rf' , RandomForestRegressor ( random_state = 42 )), ( 'svr' , svm . SVR ()), ( 'knn' , KNeighborsRegressor ()), ( 'dt' , DecisionTreeRegressor ( random_state = 42 )) ] clf = StackingRegressor ( estimators = estimators , final_estimator = MLPRegressor ( activation = \"relu\" , alpha = 0.1 , hidden_layer_sizes = ( 8 , 8 ), learning_rate = \"constant\" , max_iter = 2000 , random_state = 1000 ) ) clf . fit ( X_train , y_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , clf . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , clf . score ( X_test , y_test )) \u6211\u5011\u5148\u89c0\u5bdf\u8a13\u7df4\u5f8c\u7684 R2 score \u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u5206\u6578\u3002\u5f9e\u6578\u503c\u770b\u89c0\u5bdf\u53ef\u4ee5\u767c\u73fe\u900f\u904e\u5806\u758a\u6cd5\u5169\u8005\u9593\u7684\u5206\u6578\u5dee\u8ddd\u8b8a\u5c0f\u4e86\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9608703782891547 \u6e2c\u8a66\u96c6 Score: 0.9371735287625855 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = clf . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = clf . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u63a5\u8457\u6211\u5011\u4e00\u6a23\u8a08\u7b97 MSE \u5be6\u969b\u89c0\u5bdf\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8aa4\u5dee\u3002\u5f9e\u8a08\u7b97\u89e3\u679c\u53ef\u4ee5\u770b\u5230\u5169\u8005\u7684\u8aa4\u5dee\u90fd\u662f\u5dee\u4e0d\u591a\u7684\u3002\u5f9e\u9019\u88e1\u6211\u5011\u5c31\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u900f\u904e Stacking \u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u64ec\u5408\uff0c\u4e26\u4e14\u900f\u904e\u591a\u500b\u57fa\u5e95\u7684\u6a21\u578b\u8b93\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6709\u6bd4\u8f03\u5e73\u6ed1\u7684\u8f38\u51fa\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 3.389581229598408 \u6e2c\u8a66\u96c6 MSE: 3.9225215768179433 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking)"},{"location":"16.Stacking/#day-16-stacking","text":"","title":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking)"},{"location":"16.Stacking/#_1","text":"\u4e86\u89e3 Stacking \u65b9\u6cd5 \u5806\u758a\u6cd5\u7684\u5b78\u7fd2\u6a5f\u5236\u70ba\u4f55\uff1f \u5229\u7528 Stacking \u5be6\u4f5c\u8ff4\u6b78\u5668 \u900f\u904e Stacking Regressor \u5efa\u7acb\u623f\u50f9\u9810\u6e2c\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"16.Stacking/#_2","text":"\u5806\u758a\u6cd5 (Stacking) \u662f\u6574\u9ad4\u5b78\u7fd2\u4e2d\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u5b83\u662f\u7d50\u5408\u8a31\u591a\u7368\u7acb\u7684\u6a21\u578b\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\uff0c\u4e26\u5c07\u6bcf\u500b\u7368\u7acb\u6a21\u578b\u7684\u8f38\u51fa\u8996\u70ba\u6700\u7d42\u6a21\u578b\u9810\u6e2c\u7684\u8f38\u5165\u7279\u5fb5\uff0c\u6700\u5f8c\u518d\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u6a21\u578b\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u4e8b\u5148\u8a13\u7df4\u4e09\u500b\u57fa\u5e95\u7684\u6a21\u578b (base learner)\uff0c\u9019\u4e09\u500b\u6a21\u578b\u5f7c\u6b64\u4e92\u76f8\u7121\u95dc\u9023\u3002\u7531\u65bc\u6bcf\u4e00\u500b\u6a21\u578b\u6240\u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u80fd\u529b\u90fd\u4e0d\u540c\uff0c\u4e5f\u8a31\u6a21\u578b\u4e00\u5728\u67d0\u500b\u5340\u6bb5\u7684\u8cc7\u6599\u6709\u4e0d\u592a\u597d\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u800c\u6a21\u578b\u4e8c\u80fd\u88dc\u8db3\u6a21\u578b\u4e00\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\u3002\u85c9\u7531\u4e0a\u8ff0\u9019\u500b\u89c0\u9ede\u6211\u5011\u5c07\u4e09\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u8f38\u51fa\u96c6\u5408\u8d77\u4f86(P1\u3001P2\u3001P3)\uff0c\u5982\u679c\u662f\u5206\u985e\u554f\u984c\u53ef\u4ee5\u900f\u904e\u6295\u7968\u65b9\u5f0f\uff0c\u800c\u8ff4\u6b78\u554f\u984c\u53ef\u4ee5\u63a1\u7528\u5e73\u5747\u6cd5\u6216\u662f\u52a0\u6b0a\u5e73\u5747\u6cd5\u5c07\u6240\u6709\u7684\u9810\u6e2c\u505a\u6700\u5f8c\u8a55\u4f30\u3002\u53c8\u6216\u8005\u662f\u53ef\u4ee5\u5c07\u9019\u4e09\u500b\u8f38\u51fa\u503c\u7576\u4f5c\u662f\u65b0\u6a21\u578b\u7684\u7279\u5fb5\u518d\u4e1f\u5165\u4e00\u500b\u6a5f\u5668\u5b78\u578b\u6a21\u578b\u505a\u6700\u5f8c\u7684\u9810\u6e2c\u5f97\u5230\u6700\u7d42\u8f38\u51fa\u3002","title":"\u524d\u8a00"},{"location":"16.Stacking/#_3","text":"\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c Stacking \u65b9\u6cd5\u5efa\u6a21\u3002\u4e26\u89c0\u5bdf\u540c\u4e00\u7d44\u8cc7\u6599\u5728\u55ae\u4e00\u6a21\u578b\u4e0b\u9810\u6e2c\uff0c\u8207\u52a0\u5165 Stacking \u6a5f\u5236\u5f8c\u7684\u7d50\u679c\u6709\u7121\u6539\u5584\u3002","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"16.Stacking/#1","text":"\u9996\u5148\u6211\u5011\u5920\u904e Sklearn \u5957\u4ef6\u8b80\u5165\u6ce2\u58eb\u9813\u623f\u50f9\u8cc7\u6599\u96c6\uff0c\u4e26\u5c07\u8f38\u5165\u7279\u5fb5\u8207\u623f\u50f9\u5408\u4f75\u6210\u4e00\u500b DataFrame\u3002\u5728\u6b64\u8cc7\u6599\u96c6\u4e2d\u7e3d\u5171\u6709 13 \u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4ee5\u53ca\u4e00\u500b\u8f38\u51fa MEDV \u5373\u70ba\u623f\u50f9\u3002 # load boston_dataset boston_dataset = load_boston () boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"16.Stacking/#2","text":"\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u8457\u91cd\u65bc\u6bd4\u8f03\u6a21\u578b\u7684\u5dee\u7570\uff0c\u56e0\u6b64\u6c92\u6709\u6309\u7167\u6b63\u5e38\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u8d70\u3002\u8cc7\u6599\u8996\u89ba\u5316\u4ee5\u53ca\u524d\u8655\u7406...\u7b49\u662f\u975e\u5e38\u91cd\u8981\u7684\u54e6\uff01\u5728\u6b64\u6b65\u9a5f\u6211\u5011\u5feb\u901f\u7269\u7684\u5c07\u4e7e\u6de8\u7684\u8cc7\u6599\u5207\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0c\u5176\u4e2d\u8a13\u7df4\u96c6 X_train \u8207 y_train \u662f\u5be6\u969b\u53c3\u8207\u884c\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c X_test \u8207 y_test \u662f\u672a\u53c3\u8207\u8a13\u7df4\u7684\u8cc7\u6599\uff0c\u5b83\u662f\u88ab\u62ff\u4f86\u6e2c\u8a66\u8a55\u4f30\u6700\u7d42\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from sklearn.model_selection import train_test_split X = boston . drop ([ 'MEDV' ], axis = 1 ) . values y = boston [[ 'MEDV' ]] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.1 , random_state = 42 ) print ( 'Training data shape:' , X_train . shape ) print ( 'Testing data shape:' , X_test . shape ) \u7531\u65bc Sklearn \u8cc7\u6599\u96c6\u63d0\u4f9b\u7684\u8cc7\u6599\u6a23\u672c\u6578\u6bd4\u8f03\u5c11\uff0c\u56e0\u6b64\u6e2c\u8a66\u96c6\u50c5\u5207\u51fa 0.1 \u7684\u8cc7\u6599\u3002 \u57f7\u884c\u7d50\u679c\uff1a Training data shape: (455, 13) Testing data shape: (51, 13)","title":"2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"16.Stacking/#xgboost","text":"\u56e0\u70ba\u8981\u8207 Stacking \u505a\u4e00\u500b\u6bd4\u8f03\u3002\u56e0\u6b64\u9019\u88e1\u4f7f\u7528 XGBoost \u5148\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4e26\u5c07\u7d50\u679c\u8207 Stacking \u505a\u6bd4\u8f03\u3002 from xgboost import XGBRegressor # \u5efa\u7acb XGBRegressor \u6a21\u578b xgboostModel = XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgboostModel . predict ( X_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , xgboostModel . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , xgboostModel . score ( X_test , y_test )) \u5f9e\u9810\u6e2c\u7d50\u679c\u6211\u5011\u5148\u4f86\u67e5\u770b R2 score\uff0c\u4e00\u5207\u770b\u4f3c\u9084 ok\u3002\u4e0d\u904e\u9019\u88e1\u8981\u547c\u7c72\u5404\u4f4d\u8b80\u8005\u7d55\u4e0d\u8981\u770b R2 \u5206\u6578\u9ad8\u5c31\u9ad8\u8208\u5f97\u592a\u65e9\uff01 \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9999920949016282 \u6e2c\u8a66\u96c6 Score: 0.9292786904177338 \u6211\u5011\u4f86\u770b\u4e00\u4e0b MSE \u5be6\u969b\u7b97\u4e00\u4e0b\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u8aa4\u5dee\u3002\u53ef\u4ee5\u767c\u73fe\u5f88\u660e\u986f\u7684\u904e\u5ea6\u64ec\u5408\u4e86\uff0c\u7c21\u55ae\u4f86\u8aaa\u5728\u8a13\u7df4\u96c6\u7684\u8cc7\u6599\u7b97\u51fa\u4f86\u7684 MSE \u5f88\u5c0f\uff0c\u4f46\u662f\u5728\u6e2c\u8a66\u96c6\u4e2d MSE \u9810\u6e2c\u80fd\u529b\u4e0d\u8db3\u9020\u6210\u8aa4\u5dee\u8b8a\u5927\u3002 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = xgboostModel . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = xgboostModel . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 0.0006847746512112584 \u6e2c\u8a66\u96c6 MSE: 4.415429632025227","title":"XGBoost \u6a21\u578b"},{"location":"16.Stacking/#stacking","text":"Stacking \u7d50\u5408\u8a31\u591a\u5f31\u5b78\u7fd2\u5668\uff0c\u5c07\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\u7576\u4f5c\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u63a5\u8457\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u5efa\u7acb\u4e86\u56db\u7a2e\u8ff4\u6b78\u5668\uff0c\u5206\u5225\u6709\u96a8\u6a5f\u68ee\u6797\u3001\u652f\u6301\u5411\u91cf\u6a5f\u3001KNN \u8207\u6c7a\u7b56\u6a39\u3002\u6700\u7d42\u7684\u6a21\u578b\u6211\u5011\u63a1\u7528\u5169\u5c64\u96b1\u85cf\u5c64\u7684\u795e\u7d93\u7db2\u8def\u4f5c\u70ba\u6700\u5f8c\u7684\u623f\u50f9\u9810\u6e2c\u8a55\u4f30\u6a21\u578b\u3002 Parameters: - estimators: m \u500b\u5f31\u5b78\u7fd2\u5668\u3002 - final_estimator: \u96c6\u5408\u6240\u6709\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\uff0c\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u9810\u6e2c\u6a21\u578b\u3002\u9810\u8a2d\u70baLogisticRegression\u3002 Attributes: - estimators_: \u67e5\u770b\u5f31\u5b78\u7fd2\u5668\u7d44\u5408\u3002 - final_estimator: \u67e5\u770b\u6700\u7d42\u6574\u5408\u8a13\u7df4\u6a21\u578b\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn import svm from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import StackingRegressor from sklearn.neural_network import MLPRegressor estimators = [ ( 'rf' , RandomForestRegressor ( random_state = 42 )), ( 'svr' , svm . SVR ()), ( 'knn' , KNeighborsRegressor ()), ( 'dt' , DecisionTreeRegressor ( random_state = 42 )) ] clf = StackingRegressor ( estimators = estimators , final_estimator = MLPRegressor ( activation = \"relu\" , alpha = 0.1 , hidden_layer_sizes = ( 8 , 8 ), learning_rate = \"constant\" , max_iter = 2000 , random_state = 1000 ) ) clf . fit ( X_train , y_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , clf . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , clf . score ( X_test , y_test )) \u6211\u5011\u5148\u89c0\u5bdf\u8a13\u7df4\u5f8c\u7684 R2 score \u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u5206\u6578\u3002\u5f9e\u6578\u503c\u770b\u89c0\u5bdf\u53ef\u4ee5\u767c\u73fe\u900f\u904e\u5806\u758a\u6cd5\u5169\u8005\u9593\u7684\u5206\u6578\u5dee\u8ddd\u8b8a\u5c0f\u4e86\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9608703782891547 \u6e2c\u8a66\u96c6 Score: 0.9371735287625855 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = clf . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = clf . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u63a5\u8457\u6211\u5011\u4e00\u6a23\u8a08\u7b97 MSE \u5be6\u969b\u89c0\u5bdf\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8aa4\u5dee\u3002\u5f9e\u8a08\u7b97\u89e3\u679c\u53ef\u4ee5\u770b\u5230\u5169\u8005\u7684\u8aa4\u5dee\u90fd\u662f\u5dee\u4e0d\u591a\u7684\u3002\u5f9e\u9019\u88e1\u6211\u5011\u5c31\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u900f\u904e Stacking \u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u64ec\u5408\uff0c\u4e26\u4e14\u900f\u904e\u591a\u500b\u57fa\u5e95\u7684\u6a21\u578b\u8b93\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6709\u6bd4\u8f03\u5e73\u6ed1\u7684\u8f38\u51fa\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 3.389581229598408 \u6e2c\u8a66\u96c6 MSE: 3.9225215768179433 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Stacking \u6a21\u578b"},{"location":"17.LightGBM/","text":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 LightGBM \u8207 XGBoost \u6bd4\u8f03 \u4e86\u89e3 LightGBM \u512a\u9ede \u5be6\u4f5c LightGBM \u8655\u7406\u8cc7\u6599\u4e0d\u5e73\u8861\u8cc7\u6599 \u4fe1\u7528\u5361\u76dc\u5237\u5075\u6e2c (\u4e8c\u5143\u5206\u985e) \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 LightGBM \u662f\u5c6c\u65bc GBDT \u5bb6\u65cf\u4e2d\u6210\u54e1\u4e4b\u4e00\uff0c\u76f8\u8f03\u65bc\u5148\u524d\u4ecb\u7d39\u7684 XGBoost \u5169\u8005\u53ef\u4ee5\u62ff\u4f86\u505a\u6bd4\u8f03\u3002\u7c21\u55ae\u4f86\u8aaa\u5f9e LightGBM \u540d\u5b57\u4e0a\u89c0\u5bdf\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5b83\u662f\u8f15\u91cf\u5316 (Light) \u7684\u68af\u5ea6\u63d0\u5347\u6a5f (GBM) \u7684\u5be6\u4f8b\u3002\u5176\u76f8\u5c0d XGBoost \u4f86\u8aaa\u5b83\u5177\u6709\u8a13\u7df4\u901f\u5ea6\u5feb\u3001\u8a18\u61b6\u9ad4\u4f54\u7528\u4f4e\u7684\u7279\u9ede\uff0c\u56e0\u6b64\u8fd1\u5e7e\u5e74 LightGBM \u5728 Kaggle \u4e0a\u4e5f\u7b97\u662f\u71b1\u9580\u6a21\u578b\u4e00\u3002 LightGBM \u8207 XGBoost \u6bd4\u8f03 \u9019\u5169\u7a2e\u6f14\u7b97\u6cd5\u90fd\u4f7f\u7528\u8caa\u5a6a\u7684\u65b9\u6cd5\u4f86\u6700\u5c0f\u5316\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\u4f86\u69cb\u5efa\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u3002\u5176 tree-based \u6f14\u7b97\u6cd5\u6240\u9762\u81e8\u7684\u6311\u6230\u662f\u5982\u4f55\u6311\u9078\u6700\u4f73\u7684\u8449\u7bc0\u9ede\u7684\u5207\u5272\u65b9\u5f0f\uff0c\u7136\u800c LightGBM \u548c XGBoost \u5206\u5225\u4f7f\u7528\u4e0d\u540c\u7684\u512a\u5316\u6280\u8853\u8207\u65b9\u6cd5\u4f86\u8b58\u5225\u6700\u4f73\u7684\u5206\u5272\u9ede\u3002 LightGBM \u512a\u9ede LightGBM \u7531\u5fae\u8edf\u5718\u968a\u65bc 2017 \u5e74\u6240\u767c\u8868\u7684\u8ad6\u6587 LightGBM: A Highly Efficient Gradient Boosting Decision Tree \u88ab\u63d0\u51fa\u3002\u5176\u4e3b\u8981\u60f3\u6cd5\u662f\u5229\u7528\u6c7a\u7b56\u6a39\u70ba\u57fa\u5e95\u7684\u5f31\u5b78\u7fd2\u5668\uff0c\u4e0d\u65b7\u5730\u8fed\u4ee3\u8a13\u7df4\u4e26\u53d6\u5f97\u6700\u4f73\u7684\u6a21\u578b\u3002\u540c\u6642\u8a72\u6f14\u7b97\u6cd5\u9032\u884c\u4e86\u512a\u5316\u4f7f\u5f97\u8a13\u7df4\u901f\u5ea6\u8b8a\u5feb\uff0c\u4e26\u4e14\u6709\u6548\u964d\u88ab\u6d88\u8017\u7684\u8cc7\u6e90\u3002LightGBM \u4e5f\u662f\u500b\u958b\u6e90\u5c08\u6848\u5927\u5bb6\u53ef\u4ee5\u5728 GitHub \u4e0a\u53ef\u4ee5\u53d6\u5f97\u76f8\u95dc\u8cc7\u8a0a\u3002 \u5728\u5b98\u65b9\u7684\u6587\u4ef6\u4e2d\u4e5f\u689d\u5217\u4e86\u5e7e\u500b LightGBM \u7684\u512a\u9ede\uff1a - \u66f4\u5feb\u7684\u8a13\u7df4\u901f\u5ea6\u548c\u66f4\u9ad8\u7684\u6548\u7387 - \u4f4e\u8a18\u61b6\u9ad4\u4f7f\u7528\u7387 - \u66f4\u597d\u7684\u6e96\u78ba\u5ea6 - \u652f\u63f4 GPU \u5e73\u884c\u904b\u7b97 - \u80fd\u5920\u8655\u7406\u5927\u898f\u6a21\u6578\u64da LightGBM \u4f7f\u7528 leaf-wise tree \u6f14\u7b97\u6cd5\uff0c\u56e0\u6b64\u5728\u8fed\u4ee3\u904e\u7a0b\u4e2d\u80fd\u66f4\u5feb\u5730\u6536\u6582\u3002\u4f46\u662f leaf-wise tree \u65b9\u6cd5\u8f03\u5bb9\u6613\u904e\u64ec\u5408\u3002\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6587\u7ae0\u6700\u5f8c\u63d0\u4f9b\u7684\u76f8\u95dc\u8cc7\u6e90\u3002 \u8655\u7406 unbalance \u8cc7\u6599 \u5728\u4f7f\u7528 LightGBM \u505a\u5206\u985e\u5668\u6642\u8a72\u5982\u4f55\u8655\u7406\u6a23\u672c\u985e\u5225\u5206\u4f48\u4e0d\u5e73\u8861\u7684\u554f\u984c\uff1f\u4e00\u500b\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u8a2d\u5b9a is_unbalance=True \uff0c\u6216\u662f scale_pos_weight \u6ce8\u610f\u9019\u5169\u500b\u53c3\u6578\u53ea\u80fd\u64c7\u4e00\u4f7f\u7528\u3002\u4ee5\u4e0b\u6211\u5011\u5c31\u4f7f\u7528\u4e00\u500b\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\uff0c\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u4f86\u505a\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u53ef\u4ee5\u8f09\u5165 Google \u6240\u63d0\u4f9b\u7684\u4fe1\u7528\u5361\u76dc\u5237\u8cc7\u6599\u96c6\uff0c\u8a73\u7d30\u8cc7\u8a0a\u53ef\u4ee5\u53c3\u8003 \u9019\u88e1 \u3002 import pandas as pd raw_df = pd . read_csv ( 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv' ) X = raw_df . drop ( columns = [ 'Class' ]) y = raw_df [ 'Class' ] print ( 'X:' , X . shape ) print ( 'Y:' , y . shape ) \u8f09\u5165\u6210\u529f\u5f8c\u6211\u5011\u53ef\u4ee5\u770b\u5230\u8a72\u8cc7\u6599\u96c6\u5171\u6709 284807 \u7b46\u8cc7\u6599\uff0c\u6bcf\u4e00\u7b46\u8cc7\u6599\u6709 30 \u500b\u7279\u5fb5\u3002 X: (284807, 30) Y: (284807,) \u70ba\u4e86\u65b9\u4fbf\u6aa2\u8996\u5be6\u9a57\u7d50\u679c\uff0c\u6211\u5011\u4f9d\u7167 y \u7684\u6bd4\u4f8b\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u9019\u88e1\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c stratify \u70ba\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\u6642\u4f8b\u5982\uff1a\u8ca0\u6a23\u672c\u53ef\u80fd\u6bd4\u6b63\u6a23\u672c\u591a\u5e7e\u500d\u3002\u5728\u9019\u7a2e\u60c5\u6cc1\u4e0b\uff0c\u5efa\u8b70\u4f7f\u7528\u5206\u5c64\u62bd\u6a23\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'X_train:' , X_train . shape ) print ( 'X_test:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X_train: (199364, 30) X_test: (85443, 30) \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7d93\u7531 7:3 \u7684\u6bd4\u4f8b\u4e0b\u53bb\u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u53ef\u4ee5\u767c\u73fe\u5207\u5272\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5728\u76dc\u5237(1)\u8207\u975e\u76dc\u5237(0)\u7684\u8cc7\u6599\u6bd4\u4f8b\u662f\u5dee\u4e0d\u591a\u7684\u3002 \u63a5\u4e0b\u4f86\u91cd\u982d\u6232\u51fa\u5834\u3002\u6211\u5011\u63a1\u7528 LightGBM \u5206\u985e\u5668\uff0c\u82e5\u9084\u6c92\u5b89\u88dd\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\u3002 pip install lightgbm \u5b89\u88dd\u7d50\u675f\u5f8c\u5373\u53ef\u8f09\u5165 lightgbm \u5957\u4ef6\u4e26\u9078\u7528 LGBMClassifier \u5206\u985e\u5668\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u5728\u5efa\u7acb\u5206\u985e\u5668\u540c\u6642\u8a2d\u5b9a\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u9019\u88e1\u6211\u5011\u4f86\u793a\u7bc4\u4f7f\u7528 is_unbalance=True \u8a13\u7df4\u6a21\u578b\u3002\u9664\u6b64\u4e4b\u5916\u6a21\u578b\u7684\u8d85\u53c3\u6578\u6709\u5f88\u591a\uff0c\u53ef\u4ee5\u7531 \u5b98\u65b9 \u6587\u4ef6\u4e2d\u67e5\u95b1\u3002\u4ee5\u4e0b\u5e6b\u5404\u4f4d\u6574\u7406\u5e38\u7528\u7684\u65b9\u6cd5\uff1a Parameters: - num_iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.1\u3002 - boosting: \u9078\u64c7 boosting \u7a2e\u985e\u3002\u5171\u56db\u7a2e gbdt\u3001rf\u3001dart\u3001goss\uff0c\u9810\u8a2d\u70ba gbdt\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9810\u8a2d\u503c\u70ba-1\u5373\u8868\u793a\u7121\u9650\u5236\u3002 - min_data_in_leaf: \u4e00\u500b\u5b50\u8449\u4e2d\u6700\u5c11\u6578\u64da\uff0c\u53ef\u7528\u65bc\u8655\u7406\u904e\u64ec\u5408\u3002\u9810\u8a2d20\u7b46\u3002 - max_bin: \u5c07\u7279\u5fb5\u503c\u653e\u5165\u6876\u4e2d\u7684\u6700\u5927bins\u6578\u3002\u9810\u8a2d\u70ba255\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import lightgbm as lgb model = lgb . LGBMClassifier ( is_unbalance = True ) model . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u5373\u53ef\u4f7f\u7528\u525b\u5207\u5272\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6a21\u578b\u8a55\u4f30\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u6e96\u78ba\u7387\u9ad8\u9054 94%\u3002 from sklearn.metrics import accuracy_score pred = model . predict ( X_test ) print ( \"Accuracy:\" , accuracy_score ( y_test , pred )) \u8f38\u51fa\u7d50\u679c\uff1a Accuracy: 0.9401706400758401 \u5982\u679c\u8981\u5224\u65b7\u5206\u985e\u5668\u7684\u597d\u58de\uff0c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f86\u8a55\u4f30\u662f\u4e00\u500b\u4e0d\u597d\u7684\u7fd2\u6163\u3002\u6211\u5011\u61c9\u8a72\u5584\u7528\u6df7\u6dc6\u77e9\u9663\u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u4e26\u67e5\u770b\u6b63\u6a23\u672c\u8207\u8ca0\u6a23\u672c\u5728\u9810\u6e2c\u4e0a\u7684\u8868\u73fe\u3002\u9996\u5148\u6211\u5011\u5148\u4f86\u5beb\u4e00\u500b\u8a08\u7b97\u6df7\u6dc6\u77e9\u9663\u7684\u51fd\u5f0f\uff0c\u4e26\u7528 seaborn \u7e6a\u88fd\u51fa\u71b1\u529b\u5716\u77e9\u9663\u3002 import seaborn as sns import matplotlib.pyplot as plt def plot_confusion_matrix ( actual_val , pred_val , title = None ): confusion_matrix = pd . crosstab ( actual_val , pred_val , rownames = [ 'Actual' ], colnames = [ 'Predicted' ]) plot = sns . heatmap ( confusion_matrix , annot = True , fmt = ',.0f' ) if title is None : pass else : plot . set_title ( title ) plt . show () \u5728\u8a55\u4f30\u6a21\u578b\u4e4b\u524d\u6211\u5011\u5148\u4f86\u67e5\u770b\u6e2c\u8a66\u96c6\u8f38\u51fa y \u7684\u5206\u4f48\u5404\u662f\u591a\u5c11\u3002\u900f\u904e numpy \u7684 unique \u65b9\u6cd5\u53ef\u4ee5\u8a08\u7b97 y_test \u4e2d\u6bcf\u500b\u985e\u5225\u7684\u6578\u91cf\u3002\u5f9e\u8f38\u51fa\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\uff0c85443 \u7b46\u6e2c\u8a66\u96c6\u4e2d\u5171\u6709 85295 \u7b46\u662f\u6a19\u7c64 0(\u672a\u76dc\u5237)\u3001148 \u7b46\u662f\u6a19\u7c64 1(\u76dc\u5237)\u3002\u77e5\u9053\u9019\u4e9b\u771f\u5be6\u6578\u64da\u7684\u6578\u91cf\u5f8c\uff0c\u63a5\u4e0b\u4f86\u6211\u5011\u5c31\u53ef\u4ee5\u900f\u904e\u6df7\u6dc6\u77e9\u9663\u4f86\u67e5\u770b\u6a21\u578b\u662f\u5426\u6709\u5c07\u9019\u4e9b\u76dc\u5237\u7684\u8cc7\u6599\u88ab\u6b63\u78ba\u9810\u6e2c\u51fa\u4f86\u3002 import numpy as np unique , counts = np . unique ( y_test , return_counts = True ) dict ( zip ( unique , counts )) \u8f38\u51fa\u7d50\u679c\uff1a { 0 : 85295 , 1 : 148 } plot_confusion_matrix \u51fd\u5f0f\u5efa\u7acb\u5b8c\u6210\u5f8c\u5373\u53ef\u547c\u53eb\u3002\u6b64\u51fd\u5f0f\u6709\u4e09\u500b\u8f38\u5165\uff0c\u5206\u5225\u70ba y_test \u5be6\u969b\u8f38\u51fa\u7b54\u6848\u3001 pred \u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3001title \u5716\u8868\u6a19\u984c(\u9810\u8a2dNone)\u3002\u76f8\u5c0d\u61c9\u7684\u8b8a\u6578\u8f38\u5165\u5f8c\u5373\u53ef\u5f97\u5230\u8a08\u7b97\u597d\u7684\u6df7\u6dc6\u77e9\u9663\u3002 plot_confusion_matrix ( y_test , pred ) \u4e0b\u5716\u70ba\u5be6\u969b is_unbalance=True \u7684\u8a13\u7df4\u7d50\u679c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5728\u6e2c\u8a66\u96c6\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\uff0c\u5176\u4e2d\u6709 124 \u7b46\u76dc\u5237\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u771f\u5be6\u7b54\u6848\u662f\u6c92\u76dc\u5237\u7684\u8cc7\u6599\u5c45\u7136\u6709 5088 \u7b46\u88ab\u8aa4\u5224\u6210\u76dc\u5237\u3002 \u6211\u5011\u518d\u4f86\u8a66\u8a66\u5c07 is_unbalance \u8a2d\u70ba False \u4e26\u89c0\u5bdf\u6df7\u6dc6\u77e9\u9663\u3002\u53ef\u4ee5\u767c\u73fe\u96d6\u7136\u8aa4\u5224\u7684\u6578\u91cf\u6e1b\u5c11\u4e86\uff0c\u4f46\u662f\u771f\u5be6\u7b54\u6848\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\u50c5\u6709 88 \u7b46\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u6211\u5011\u53ef\u4ee5\u731c\u60f3\u6a21\u578b\u5728\u5927\u591a\u6578\u72c0\u6cc1\u90fd\u6703\u9810\u6e2c\u8cc7\u6599\u672a\u88ab\u76dc\u5237\u7684\u6a5f\u7387\u8f03\u5927\u3002 Reference \u7d42\u65bc\u6709\u4eba\u628aXGBoost \u548c LightGBM \u8b1b\u660e\u767d\u4e86\uff0c\u9805\u76ee\u4e2d\u6700\u4e3b\u6d41\u7684\u96c6\u6210\u6f14\u7b97\u6cd5\uff01 Lightgbm\u57fa\u672c\u539f\u7406\u4ecb\u7d39 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM"},{"location":"17.LightGBM/#day-17-lightgbm","text":"","title":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM"},{"location":"17.LightGBM/#_1","text":"LightGBM \u8207 XGBoost \u6bd4\u8f03 \u4e86\u89e3 LightGBM \u512a\u9ede \u5be6\u4f5c LightGBM \u8655\u7406\u8cc7\u6599\u4e0d\u5e73\u8861\u8cc7\u6599 \u4fe1\u7528\u5361\u76dc\u5237\u5075\u6e2c (\u4e8c\u5143\u5206\u985e) \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"17.LightGBM/#_2","text":"LightGBM \u662f\u5c6c\u65bc GBDT \u5bb6\u65cf\u4e2d\u6210\u54e1\u4e4b\u4e00\uff0c\u76f8\u8f03\u65bc\u5148\u524d\u4ecb\u7d39\u7684 XGBoost \u5169\u8005\u53ef\u4ee5\u62ff\u4f86\u505a\u6bd4\u8f03\u3002\u7c21\u55ae\u4f86\u8aaa\u5f9e LightGBM \u540d\u5b57\u4e0a\u89c0\u5bdf\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5b83\u662f\u8f15\u91cf\u5316 (Light) \u7684\u68af\u5ea6\u63d0\u5347\u6a5f (GBM) \u7684\u5be6\u4f8b\u3002\u5176\u76f8\u5c0d XGBoost \u4f86\u8aaa\u5b83\u5177\u6709\u8a13\u7df4\u901f\u5ea6\u5feb\u3001\u8a18\u61b6\u9ad4\u4f54\u7528\u4f4e\u7684\u7279\u9ede\uff0c\u56e0\u6b64\u8fd1\u5e7e\u5e74 LightGBM \u5728 Kaggle \u4e0a\u4e5f\u7b97\u662f\u71b1\u9580\u6a21\u578b\u4e00\u3002","title":"\u524d\u8a00"},{"location":"17.LightGBM/#lightgbm-xgboost","text":"\u9019\u5169\u7a2e\u6f14\u7b97\u6cd5\u90fd\u4f7f\u7528\u8caa\u5a6a\u7684\u65b9\u6cd5\u4f86\u6700\u5c0f\u5316\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\u4f86\u69cb\u5efa\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u3002\u5176 tree-based \u6f14\u7b97\u6cd5\u6240\u9762\u81e8\u7684\u6311\u6230\u662f\u5982\u4f55\u6311\u9078\u6700\u4f73\u7684\u8449\u7bc0\u9ede\u7684\u5207\u5272\u65b9\u5f0f\uff0c\u7136\u800c LightGBM \u548c XGBoost \u5206\u5225\u4f7f\u7528\u4e0d\u540c\u7684\u512a\u5316\u6280\u8853\u8207\u65b9\u6cd5\u4f86\u8b58\u5225\u6700\u4f73\u7684\u5206\u5272\u9ede\u3002","title":"LightGBM \u8207 XGBoost \u6bd4\u8f03"},{"location":"17.LightGBM/#lightgbm","text":"LightGBM \u7531\u5fae\u8edf\u5718\u968a\u65bc 2017 \u5e74\u6240\u767c\u8868\u7684\u8ad6\u6587 LightGBM: A Highly Efficient Gradient Boosting Decision Tree \u88ab\u63d0\u51fa\u3002\u5176\u4e3b\u8981\u60f3\u6cd5\u662f\u5229\u7528\u6c7a\u7b56\u6a39\u70ba\u57fa\u5e95\u7684\u5f31\u5b78\u7fd2\u5668\uff0c\u4e0d\u65b7\u5730\u8fed\u4ee3\u8a13\u7df4\u4e26\u53d6\u5f97\u6700\u4f73\u7684\u6a21\u578b\u3002\u540c\u6642\u8a72\u6f14\u7b97\u6cd5\u9032\u884c\u4e86\u512a\u5316\u4f7f\u5f97\u8a13\u7df4\u901f\u5ea6\u8b8a\u5feb\uff0c\u4e26\u4e14\u6709\u6548\u964d\u88ab\u6d88\u8017\u7684\u8cc7\u6e90\u3002LightGBM \u4e5f\u662f\u500b\u958b\u6e90\u5c08\u6848\u5927\u5bb6\u53ef\u4ee5\u5728 GitHub \u4e0a\u53ef\u4ee5\u53d6\u5f97\u76f8\u95dc\u8cc7\u8a0a\u3002 \u5728\u5b98\u65b9\u7684\u6587\u4ef6\u4e2d\u4e5f\u689d\u5217\u4e86\u5e7e\u500b LightGBM \u7684\u512a\u9ede\uff1a - \u66f4\u5feb\u7684\u8a13\u7df4\u901f\u5ea6\u548c\u66f4\u9ad8\u7684\u6548\u7387 - \u4f4e\u8a18\u61b6\u9ad4\u4f7f\u7528\u7387 - \u66f4\u597d\u7684\u6e96\u78ba\u5ea6 - \u652f\u63f4 GPU \u5e73\u884c\u904b\u7b97 - \u80fd\u5920\u8655\u7406\u5927\u898f\u6a21\u6578\u64da LightGBM \u4f7f\u7528 leaf-wise tree \u6f14\u7b97\u6cd5\uff0c\u56e0\u6b64\u5728\u8fed\u4ee3\u904e\u7a0b\u4e2d\u80fd\u66f4\u5feb\u5730\u6536\u6582\u3002\u4f46\u662f leaf-wise tree \u65b9\u6cd5\u8f03\u5bb9\u6613\u904e\u64ec\u5408\u3002\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6587\u7ae0\u6700\u5f8c\u63d0\u4f9b\u7684\u76f8\u95dc\u8cc7\u6e90\u3002","title":"LightGBM \u512a\u9ede"},{"location":"17.LightGBM/#unbalance","text":"\u5728\u4f7f\u7528 LightGBM \u505a\u5206\u985e\u5668\u6642\u8a72\u5982\u4f55\u8655\u7406\u6a23\u672c\u985e\u5225\u5206\u4f48\u4e0d\u5e73\u8861\u7684\u554f\u984c\uff1f\u4e00\u500b\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u8a2d\u5b9a is_unbalance=True \uff0c\u6216\u662f scale_pos_weight \u6ce8\u610f\u9019\u5169\u500b\u53c3\u6578\u53ea\u80fd\u64c7\u4e00\u4f7f\u7528\u3002\u4ee5\u4e0b\u6211\u5011\u5c31\u4f7f\u7528\u4e00\u500b\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\uff0c\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u4f86\u505a\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u53ef\u4ee5\u8f09\u5165 Google \u6240\u63d0\u4f9b\u7684\u4fe1\u7528\u5361\u76dc\u5237\u8cc7\u6599\u96c6\uff0c\u8a73\u7d30\u8cc7\u8a0a\u53ef\u4ee5\u53c3\u8003 \u9019\u88e1 \u3002 import pandas as pd raw_df = pd . read_csv ( 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv' ) X = raw_df . drop ( columns = [ 'Class' ]) y = raw_df [ 'Class' ] print ( 'X:' , X . shape ) print ( 'Y:' , y . shape ) \u8f09\u5165\u6210\u529f\u5f8c\u6211\u5011\u53ef\u4ee5\u770b\u5230\u8a72\u8cc7\u6599\u96c6\u5171\u6709 284807 \u7b46\u8cc7\u6599\uff0c\u6bcf\u4e00\u7b46\u8cc7\u6599\u6709 30 \u500b\u7279\u5fb5\u3002 X: (284807, 30) Y: (284807,) \u70ba\u4e86\u65b9\u4fbf\u6aa2\u8996\u5be6\u9a57\u7d50\u679c\uff0c\u6211\u5011\u4f9d\u7167 y \u7684\u6bd4\u4f8b\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u9019\u88e1\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c stratify \u70ba\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\u6642\u4f8b\u5982\uff1a\u8ca0\u6a23\u672c\u53ef\u80fd\u6bd4\u6b63\u6a23\u672c\u591a\u5e7e\u500d\u3002\u5728\u9019\u7a2e\u60c5\u6cc1\u4e0b\uff0c\u5efa\u8b70\u4f7f\u7528\u5206\u5c64\u62bd\u6a23\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'X_train:' , X_train . shape ) print ( 'X_test:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X_train: (199364, 30) X_test: (85443, 30) \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7d93\u7531 7:3 \u7684\u6bd4\u4f8b\u4e0b\u53bb\u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u53ef\u4ee5\u767c\u73fe\u5207\u5272\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5728\u76dc\u5237(1)\u8207\u975e\u76dc\u5237(0)\u7684\u8cc7\u6599\u6bd4\u4f8b\u662f\u5dee\u4e0d\u591a\u7684\u3002 \u63a5\u4e0b\u4f86\u91cd\u982d\u6232\u51fa\u5834\u3002\u6211\u5011\u63a1\u7528 LightGBM \u5206\u985e\u5668\uff0c\u82e5\u9084\u6c92\u5b89\u88dd\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\u3002 pip install lightgbm \u5b89\u88dd\u7d50\u675f\u5f8c\u5373\u53ef\u8f09\u5165 lightgbm \u5957\u4ef6\u4e26\u9078\u7528 LGBMClassifier \u5206\u985e\u5668\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u5728\u5efa\u7acb\u5206\u985e\u5668\u540c\u6642\u8a2d\u5b9a\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u9019\u88e1\u6211\u5011\u4f86\u793a\u7bc4\u4f7f\u7528 is_unbalance=True \u8a13\u7df4\u6a21\u578b\u3002\u9664\u6b64\u4e4b\u5916\u6a21\u578b\u7684\u8d85\u53c3\u6578\u6709\u5f88\u591a\uff0c\u53ef\u4ee5\u7531 \u5b98\u65b9 \u6587\u4ef6\u4e2d\u67e5\u95b1\u3002\u4ee5\u4e0b\u5e6b\u5404\u4f4d\u6574\u7406\u5e38\u7528\u7684\u65b9\u6cd5\uff1a Parameters: - num_iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.1\u3002 - boosting: \u9078\u64c7 boosting \u7a2e\u985e\u3002\u5171\u56db\u7a2e gbdt\u3001rf\u3001dart\u3001goss\uff0c\u9810\u8a2d\u70ba gbdt\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9810\u8a2d\u503c\u70ba-1\u5373\u8868\u793a\u7121\u9650\u5236\u3002 - min_data_in_leaf: \u4e00\u500b\u5b50\u8449\u4e2d\u6700\u5c11\u6578\u64da\uff0c\u53ef\u7528\u65bc\u8655\u7406\u904e\u64ec\u5408\u3002\u9810\u8a2d20\u7b46\u3002 - max_bin: \u5c07\u7279\u5fb5\u503c\u653e\u5165\u6876\u4e2d\u7684\u6700\u5927bins\u6578\u3002\u9810\u8a2d\u70ba255\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import lightgbm as lgb model = lgb . LGBMClassifier ( is_unbalance = True ) model . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u5373\u53ef\u4f7f\u7528\u525b\u5207\u5272\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6a21\u578b\u8a55\u4f30\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u6e96\u78ba\u7387\u9ad8\u9054 94%\u3002 from sklearn.metrics import accuracy_score pred = model . predict ( X_test ) print ( \"Accuracy:\" , accuracy_score ( y_test , pred )) \u8f38\u51fa\u7d50\u679c\uff1a Accuracy: 0.9401706400758401 \u5982\u679c\u8981\u5224\u65b7\u5206\u985e\u5668\u7684\u597d\u58de\uff0c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f86\u8a55\u4f30\u662f\u4e00\u500b\u4e0d\u597d\u7684\u7fd2\u6163\u3002\u6211\u5011\u61c9\u8a72\u5584\u7528\u6df7\u6dc6\u77e9\u9663\u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u4e26\u67e5\u770b\u6b63\u6a23\u672c\u8207\u8ca0\u6a23\u672c\u5728\u9810\u6e2c\u4e0a\u7684\u8868\u73fe\u3002\u9996\u5148\u6211\u5011\u5148\u4f86\u5beb\u4e00\u500b\u8a08\u7b97\u6df7\u6dc6\u77e9\u9663\u7684\u51fd\u5f0f\uff0c\u4e26\u7528 seaborn \u7e6a\u88fd\u51fa\u71b1\u529b\u5716\u77e9\u9663\u3002 import seaborn as sns import matplotlib.pyplot as plt def plot_confusion_matrix ( actual_val , pred_val , title = None ): confusion_matrix = pd . crosstab ( actual_val , pred_val , rownames = [ 'Actual' ], colnames = [ 'Predicted' ]) plot = sns . heatmap ( confusion_matrix , annot = True , fmt = ',.0f' ) if title is None : pass else : plot . set_title ( title ) plt . show () \u5728\u8a55\u4f30\u6a21\u578b\u4e4b\u524d\u6211\u5011\u5148\u4f86\u67e5\u770b\u6e2c\u8a66\u96c6\u8f38\u51fa y \u7684\u5206\u4f48\u5404\u662f\u591a\u5c11\u3002\u900f\u904e numpy \u7684 unique \u65b9\u6cd5\u53ef\u4ee5\u8a08\u7b97 y_test \u4e2d\u6bcf\u500b\u985e\u5225\u7684\u6578\u91cf\u3002\u5f9e\u8f38\u51fa\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\uff0c85443 \u7b46\u6e2c\u8a66\u96c6\u4e2d\u5171\u6709 85295 \u7b46\u662f\u6a19\u7c64 0(\u672a\u76dc\u5237)\u3001148 \u7b46\u662f\u6a19\u7c64 1(\u76dc\u5237)\u3002\u77e5\u9053\u9019\u4e9b\u771f\u5be6\u6578\u64da\u7684\u6578\u91cf\u5f8c\uff0c\u63a5\u4e0b\u4f86\u6211\u5011\u5c31\u53ef\u4ee5\u900f\u904e\u6df7\u6dc6\u77e9\u9663\u4f86\u67e5\u770b\u6a21\u578b\u662f\u5426\u6709\u5c07\u9019\u4e9b\u76dc\u5237\u7684\u8cc7\u6599\u88ab\u6b63\u78ba\u9810\u6e2c\u51fa\u4f86\u3002 import numpy as np unique , counts = np . unique ( y_test , return_counts = True ) dict ( zip ( unique , counts )) \u8f38\u51fa\u7d50\u679c\uff1a { 0 : 85295 , 1 : 148 } plot_confusion_matrix \u51fd\u5f0f\u5efa\u7acb\u5b8c\u6210\u5f8c\u5373\u53ef\u547c\u53eb\u3002\u6b64\u51fd\u5f0f\u6709\u4e09\u500b\u8f38\u5165\uff0c\u5206\u5225\u70ba y_test \u5be6\u969b\u8f38\u51fa\u7b54\u6848\u3001 pred \u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3001title \u5716\u8868\u6a19\u984c(\u9810\u8a2dNone)\u3002\u76f8\u5c0d\u61c9\u7684\u8b8a\u6578\u8f38\u5165\u5f8c\u5373\u53ef\u5f97\u5230\u8a08\u7b97\u597d\u7684\u6df7\u6dc6\u77e9\u9663\u3002 plot_confusion_matrix ( y_test , pred ) \u4e0b\u5716\u70ba\u5be6\u969b is_unbalance=True \u7684\u8a13\u7df4\u7d50\u679c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5728\u6e2c\u8a66\u96c6\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\uff0c\u5176\u4e2d\u6709 124 \u7b46\u76dc\u5237\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u771f\u5be6\u7b54\u6848\u662f\u6c92\u76dc\u5237\u7684\u8cc7\u6599\u5c45\u7136\u6709 5088 \u7b46\u88ab\u8aa4\u5224\u6210\u76dc\u5237\u3002 \u6211\u5011\u518d\u4f86\u8a66\u8a66\u5c07 is_unbalance \u8a2d\u70ba False \u4e26\u89c0\u5bdf\u6df7\u6dc6\u77e9\u9663\u3002\u53ef\u4ee5\u767c\u73fe\u96d6\u7136\u8aa4\u5224\u7684\u6578\u91cf\u6e1b\u5c11\u4e86\uff0c\u4f46\u662f\u771f\u5be6\u7b54\u6848\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\u50c5\u6709 88 \u7b46\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u6211\u5011\u53ef\u4ee5\u731c\u60f3\u6a21\u578b\u5728\u5927\u591a\u6578\u72c0\u6cc1\u90fd\u6703\u9810\u6e2c\u8cc7\u6599\u672a\u88ab\u76dc\u5237\u7684\u6a5f\u7387\u8f03\u5927\u3002","title":"\u8655\u7406 unbalance \u8cc7\u6599"},{"location":"17.LightGBM/#reference","text":"\u7d42\u65bc\u6709\u4eba\u628aXGBoost \u548c LightGBM \u8b1b\u660e\u767d\u4e86\uff0c\u9805\u76ee\u4e2d\u6700\u4e3b\u6d41\u7684\u96c6\u6210\u6f14\u7b97\u6cd5\uff01 Lightgbm\u57fa\u672c\u539f\u7406\u4ecb\u7d39 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"18.CatBoost/","text":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 CatBoost \u6a21\u578b \u5be6\u4f5c CatBoost \u8ff4\u6b78\u6a21\u578b-\u623f\u50f9\u9810\u6e2c \u6a21\u578b\u8a13\u7df4\u3001\u7279\u5fb5\u7be9\u9078 \u8d85\u53c3\u6578\u641c\u7d22 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u53ef\u89e3\u91cb\u5316\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 CatBoost \u540c\u6a23\u662f\u57fa\u65bc Gradient Boosting Tree \u7684\u68af\u5ea6\u63d0\u5347\u6a39\u6a21\u578b\u6846\u67b6\uff0c\u6700\u5927\u7684\u7279\u9ede\u5c0d\u985e\u5225\u7279\u5fb5\u7684\u76f4\u63a5\u652f\u63f4\uff0c\u751a\u81f3\u5141\u8a31\u5b57\u4e32\u985e\u578b\u7684\u7279\u5fb5\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u8fd1\u5e74\u4f86\u5728 Kaggle \u4e0a\u7684\u6bd4\u8cfd\u9678\u7e8c\u6709\u4eba\u4f7f\u7528 CatBoost \u65b9\u6cd5\u4e26\u53d6\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u65bc\u662f\u5c31\u4f86\u64b0\u5beb\u6587\u7ae0\u9806\u4fbf\u4f86\u77a7\u77a7\u5b83\u8207\u5176\u4ed6 Boosting \u6f14\u7b97\u6cd5\u4e0d\u540c\u4e4b\u8655\u3002\u5176\u4e2d\u6700\u7279\u5225\u7684\u5730\u65b9\u662f CatBoost \u80fd\u5920\u8655\u7406\u975e\u6578\u503c\u578b\u614b\u7684\u8cc7\u6599\uff0c\u4e5f\u5c31\u662f\u8aac\u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002CatBoost \u63a1\u7528\u6c7a\u7b56\u6a39\u68af\u5ea6\u63d0\u5347\u65b9\u6cd5\u4e26\u5ba3\u7a31\u5728\u6548\u80fd\u4e0a\u6bd4 XGBoost \u548c LightGBM \u66f4\u52a0\u512a\u5316\uff0c\u540c\u6642\u652f\u63f4 CPU \u548c GPU \u904b\u7b97\u3002\u8207\u5176\u4ed6 Boosting \u65b9\u6cd5\u76f8\u6bd4 CatBoost \u662f\u4e00\u7a2e\u76f8\u5c0d\u8f03\u65b0\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u7b97\u6cd5\u3002\u8a72\u6f14\u7b97\u6cd5\u662f\u7531\u4e00\u9593\u4fc4\u7f85\u65af\u7684\u516c\u53f8 Yandex \u65bc 2017 \u5e74\u6240\u63d0\u51fa\uff0c\u540c\u6642\u5728 arxiv \u6709\u4e00\u7bc7 CatBoost: unbiased boosting with categorical features \u7684\u8ad6\u6587\uff0c\u6587\u4e2d\u4f5c\u8005\u8a73\u7d30\u8aaa\u660e\u4e86 CatBoost \u7684\u65b9\u6cd5\u8207\u512a\u9ede\u3002 CatBoost \u512a\u9ede CatBoost \u540d\u7a31\u6e90\u65bc Category \u548c Boost \u5169\u500b\u55ae\u8a5e\uff0c\u627f\u8972 Boosting \u7684\u512a\u9ede\u4e4b\u5916\u8a72\u6f14\u7b97\u6cd5\u5728\u985e\u5225\u578b\u7684\u7279\u5fb5\u4e0a\u505a\u4e86\u4e00\u4e9b\u66f4\u516c\u5e73\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5141\u8a31\u6c92\u6709\u7de8\u78bc\u7684\u985e\u5225\u7279\u5fb5\uff0c\u900f\u904e\u5206\u985e\u548c\u6578\u5b57\u7279\u5fb5\u7d44\u5408\u7684\u5404\u7a2e\u7d71\u8a08\u91cf\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7de8\u78bc\u3002\u4e0d\u904e\u5728\u8a13\u7df4\u524d\u5fc5\u9808\u78ba\u4fdd\u8a72\u7279\u5fb5\u4e2d\u7121\u7f3a\u5931\u503c\u3002\u5176\u8a13\u7df4\u8cc7\u6599\u82e5\u6709\u7f3a\u5931\u503c CatBoost \u9810\u8a2d\u6703\u5c07\u6578\u503c\u578b\u7684\u8cc7\u6599\u88dc\u4e0a\u6700\u5c0f\u503c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5 \u53c3\u8003 \u3002\u53e6\u5916\u5c0d\u65bc GPU \u7684\u4f7f\u7528\u8005\uff0c\u5b83\u4e5f\u80fd\u8655\u7406\u5b57\u4e32(\u985e\u5225)\u578b\u614b\u7684\u7279\u5fb5\u3002 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u81ea\u52d5\u8655\u7406\u7f3a\u5931\u503c \u53ef\u4ee5\u8655\u7406\u5404\u7a2e\u6578\u64da\u985e\u578b\uff0c\u5982\u97f3\u983b\u3001\u6587\u5b57\u3001\u5716\u50cf \u6e1b\u5c11\u4eba\u5de5\u8abf\u53c3\u7684\u9700\u8981\uff0c\u4e26\u964d\u4f4e\u4e86\u904e\u64ec\u5408\u7684\u6a5f\u6703 CatBoost \u5b89\u88dd CatBoost \u6f14\u7b97\u6cd5\u53ef\u4ee5\u89e3\u6c7a\u5206\u985e (CatBoostClassifier) \u548c\u8ff4\u6b78 (CatBoostRegressor) \u7684\u554f\u984c\u3002\u5b89\u88dd\u7684\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u4f7f\u7528 pip \u5c31\u80fd\u8f15\u9b06\u5b89\u88dd\u3002 pip install catboost CatBoost Parameters CatBoost \u57fa\u672c\u4e0a\u53ef\u4ee5\u81ea\u7531\u7684\u8b93\u6f14\u7b97\u6cd5\u53bb\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff0c\u4e0d\u904e API \u9084\u662f\u63d0\u4f9b\u4e00\u4e9b\u57fa\u672c\u7684\u8d85\u53c3\u6578\u8b93\u4f7f\u7528\u8005\u624b\u52d5\u8abf\u6574\u3002 Parameters: - iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba 1000\u3002 - use_best_model: \u8a2d\u5b9a True \u6642\u5fc5\u9808\u7d66\u5b9a\u9a57\u8b49\u96c6\uff0c\u5c07\u6703\u7559\u4e0b\u9a57\u8b49\u96c6\u5206\u4e2d\u6578\u6700\u9ad8\u7684\u6a21\u578b\u3002 - early_stopping_rounds: \u9023\u7e8c\u8a13\u7df4N\u4ee3\uff0c\u82e5\u7d50\u679c\u672a\u6539\u5584\u5247\u63d0\u65e9\u505c\u6b62\u8a13\u7df4\u3002 - od_type: IncToDec/Iter\uff0c\u9810\u8a2d Iter \u9632\u6b62 Overfitting \u8a55\u4f30\u65b9\u5f0f\uff0c\u82e5\u8a2d\u5b9a\u524d\u8005\u9700\u8981\u8a2d\u5b9a\u95a5\u503c\u3002 - eval_metric: \u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002 - loss_function: \u8a08\u7b97loss\u65b9\u6cd5\u3002 - verbose: True(1)/Flase(0)\uff0c\u9810\u8a2d1\u986f\u793a\u8a13\u7df4\u904e\u7a0b\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - learning_rate: \u9810\u8a2d automatically\u3002 - depth: \u6a39\u7684\u6df1\u5ea6\uff0c\u9810\u8a2d 6\u3002 - cat_features: \u8f38\u5165\u985e\u5225\u7279\u5fb5\u7684\u7d22\u5f15\uff0c\u5b83\u6703\u81ea\u52d5\u5e6b\u4f60\u8655\u7406\u3002 \u53c3\u8003 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 \u5982\u679c\u9700\u8981\u624b\u52d5\u8655\u7406 Overfitting \u554f\u984c\u53ef\u4ee5\u53c3\u8003\u9019\u4efd\u5b98\u65b9 \u6587\u4ef6 \u6a21\u578b\u8a13\u7df4 \u6a21\u578b\u8a13\u7df4\u65b9\u5f0f\u57fa\u672c\u4e0a\u8207 XGBoost \u4e00\u6a23\uff0c\u5982\u679c\u4f60\u719f\u6089 sklearn \u7684\u8a71 CatBoost \u7684\u4f7f\u7528\u65b9\u5f0f\u57fa\u672c\u4e0a\u5927\u540c\u5c0f\u7570\u3002\u53ea\u4e0d\u904e\u5728 CatBoost \u4e2d\u591a\u4e86\u4e00\u4e9b\u65b9\u4fbf\u7684\u65b9\u6cd5\u548c\u53c3\u6578\u53ef\u4ee5\u4f7f\u7528\u3002\u50cf\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u52a0\u4e0a plot=True \uff0c\u4e26\u5728 eval_set \u53c3\u6578\u4e2d\u63d2\u5165\u6e2c\u8a66\u96c6\u53ef\u4ee5\u5373\u6642\u770b\u5230\u8a13\u7df4\u904e\u7a0b\u7684\u8996\u89ba\u5316\u5206\u6790\u3002\u751a\u81f3\u53ef\u4ee5\u4f7f\u7528\u4ea4\u53c9\u9a57\u8b49\uff0c\u5728\u4e0d\u540c\u7684\u5206\u5272\u4e0a\u89c0\u5bdf\u6a21\u578b\u6e96\u78ba\u5ea6\u7684\u5e73\u5747\u548c\u6a19\u6e96\u504f\u5dee\u3002 from catboost import CatBoostRegressor # \u5efa\u7acb\u6a21\u578b model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b model . fit ( X_train , y_train , eval_set = ( X_test , y_test ), verbose = 0 , plot = True ) \u7279\u5fb5\u7be9\u9078 \u8a13\u7df4\u904e\u7a0b\u4e2d\u6703\u81ea\u52d5\u5f9e\u8cc7\u6599\u4e2d\u7be9\u9078\u5c0d\u6a21\u578b\u9810\u6e2c\u6709\u7528\u7684\u7279\u5fb5\uff0c\u4e26\u79fb\u9664\u7121\u5e6b\u52a9\u9810\u6e2c\u7684\u7279\u5fb5\u3002 \u53c3\u8003 from catboost import CatBoostRegressor , Pool , EShapCalcType , EFeaturesSelectionAlgorithm # feature_names = ['F{}'.format(i) for i in range(X_train.shape[1])] train_pool = Pool ( X_train , y_train , feature_names = boston_dataset . feature_names . tolist ()) test_pool = Pool ( X_test , y_test , feature_names = boston_dataset . feature_names . tolist ()) model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) summary = model . select_features ( train_pool , eval_set = test_pool , features_for_select = '0-12' , num_features_to_select = 3 , steps = 2 , algorithm = EFeaturesSelectionAlgorithm . RecursiveByShapValues , shap_calc_type = EShapCalcType . Regular , train_final_model = True , logging_level = 'Silent' , plot = False ) summary \u7531\u65bc\u5728\u8a13\u7df4\u5c07 num_features_to_select \u8a2d\u70ba\u4e09\uff0c\u5373\u8868\u793a\u6a21\u578b\u8a13\u7df4\u6642\u6703\u62ff\u53d6\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u7576\u4f5c\u505a\u4e2d\u6a21\u578b\u9810\u6e2c\u65b9\u5f0f\u3002\u6211\u5011\u63a1\u7528 sklearn \u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u70ba ['RM', 'PTRATIO', 'LSTAT']\u3002\u5982\u679c\u4f60\u6709\u505a EDA \u53ef\u4ee5\u767c\u73fe\u9019\u4e09\u500b\u7279\u5fb5\u8207\u623f\u50f9\u7684\u95dc\u806f\u6027\u90fd\u5f88\u9ad8\u3002 {'selected_features': [5, 10, 12], 'eliminated_features_names': ['DIS', 'B', 'ZN', 'CHAS', 'RAD', 'INDUS', 'CRIM', 'AGE', 'TAX', 'NOX'], 'eliminated_features': [7, 11, 1, 3, 8, 2, 0, 6, 9, 4], 'selected_features_names': ['RM', 'PTRATIO', 'LSTAT']} Grid search \u9664\u6b64\u4e4b\u5916 CatBoost \u63d0\u4f9b\u5c0d\u6a21\u578b\u7684\u6307\u5b9a\u53c3\u6578\u503c\u9032\u884c\u7c21\u55ae\u7684\u7db2\u683c\u641c\u7d22\uff0c\u5982\u679c\u6709\u4f7f\u7528\u904e sklearn \u7684 Grid Search \u5176\u5be6\u4ed6\u5c31\u662f\u4e00\u6a23\u7684\u4f7f\u7528\u65b9\u5f0f\u3002 \u53c3\u8003 from catboost import CatBoostRegressor grid = { 'iterations' : [ 100 , 150 , 200 ], 'learning_rate' : [ 0.03 , 0.1 ], 'depth' : [ 2 , 4 , 6 , 8 ], 'l2_leaf_reg' : [ 0.2 , 0.5 , 1 , 3 ]} model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' ) model . grid_search ( grid , X_train , y_train ) \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 CatBoost \u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002\u4e0b\u9762\u7a0b\u5f0f\u70ba\u4e00\u500b\u5206\u985e\u554f\u984c\u7684\u7bc4\u4f8b\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u7684\u7b2c\u4e00\u500b\u70ba\u5b63\u7bc0\u3002\u5728\u6a5f\u5668\u5b78\u7fd2\u4e0a\u7684\u8a8d\u77e5\u6211\u5011\u5fc5\u9808\u5c07\u6240\u4ee5\u5b57\u4e32\u578b\u8cc7\u6599\u5fc5\u9808\u900f\u904e\u6a19\u7c64\u7de8\u78bc\u65b9\u5f0f\u8f49\u63db\u6210\u6578\u503c\uff0c\u7136\u800c\u5728 CatBoost \u5b8c\u5168\u4e0d\u9700\u8981\u3002\u50c5\u9700\u5728\u8a13\u7df4\u6a21\u578b\u6642\u7d66\u4e88\u53c3\u6578 cat_features = [0] \u5373\u4ee3\u8868\u8cc7\u6599\u7684\u7b2c\u4e00\u500b\u7279\u5fb5\u9700\u8981\u9032\u884c\u985e\u5225\u8f49\u63db\u3002\u53e6\u5916\u8f38\u51fa\u8449\u4e0d\u4e00\u5b9a\u8981\u7de8\u78bc\u5f8c\u7684\u7d50\u679c\uff0c\u4f60\u4e5f\u53ef\u4ee5\u4e1f\u5165\u6587\u5b57\u9032\u884c\u8a13\u7df4\u53ea\u8981\u52a0\u4e0a loss_function='MultiClass' \u5373\u53ef\u3002 from catboost import Pool , CatBoostClassifier train_data = [[ \"summer\" , 1924 , 44 ], [ \"summer\" , 1932 , 37 ], [ \"winter\" , 1980 , 37 ], [ \"summer\" , 2012 , 204 ]] eval_data = [[ \"winter\" , 1996 , 197 ], [ \"winter\" , 1968 , 37 ], [ \"summer\" , 2002 , 77 ], [ \"summer\" , 1948 , 59 ]] train_label = [ \"France\" , \"USA\" , \"USA\" , \"UK\" ] eval_label = [ \"USA\" , \"France\" , \"USA\" , \"UK\" ] # Initialize CatBoostClassifier model = CatBoostClassifier ( iterations = 10 , learning_rate = 1 , depth = 2 , cat_features = [ 0 ], loss_function = 'MultiClass' ) # Fit model model . fit ( train_data , train_label ) # Get predicted classes preds_class = model . predict ( eval_data ) # Get predicted probabilities for each class preds_proba = model . predict_proba ( eval_data ) # Get predicted RawFormulaVal preds_raw = model . predict ( eval_data , prediction_type = 'RawFormulaVal' ) \u53c3\u8003 \u5584\u7528 Verbose \u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u96a8\u6642\u89c0\u5bdf\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684loss\uff0c\u4f7f\u7528verbose=10\u5373\u4ee3\u8868\u6bcf10\u6b21\u8fed\u4ee3\u6703\u986f\u793a\u4e00\u6b21\u8cc7\u8a0a\uff0c\u9019\u7a2e\u65b9\u5f0f\u4e5f\u89e3\u6c7a\u6bcf\u6b21\u758a\u4ee3\u986f\u793a\u4e00\u6b21\u7684\u56f0\u64fe\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5269\u9918\u6642\u9593\u4e5f\u6703\u986f\u793a\u51fa\u4f86\u3002 \u6a21\u578b\u7684\u89e3\u91cb CatBoost \u63d0\u4f9b\u4e86 plot \u53ef\u4ee5\u65b9\u4fbf\u5728\u8a13\u7df4\u6642\u67e5\u770b\u4e26\u4f5c\u5373\u6642\u5206\u6790\u8a13\u7df4\u8da8\u52e2\u3002\u9664\u6b64\u4e4b\u5916 CatBoost \u4e5f\u652f\u63f4 SHAP \u589e\u52a0\u4e86\u6a21\u578b\u53ef\u89e3\u91cb\u3002\u8a73\u7d30\u7684\u4f7f\u7528\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 \u6559\u5b78 \u3002 \u5c0f\u7d50 CatBoost \u7684\u512a\u9ede\u548c\u4f7f\u7528\u65b9\u6cd5\u90fd\u4ecb\u7d39\u5b8c\u4e86\uff0c\u662f\u4e0d\u662f\u89ba\u5f97\u5341\u5206\u7c21\u55ae\u6613\u7528\u4e14\u529f\u80fd\u5f37\u5927\u3002\u5c24\u5176\u662f\u9047\u5230\u8cc7\u6599\u9700\u8981\u5927\u91cf\u8655\u7406\u548c\u7279\u5fb5\u6578\u503c\u5316\u7684\u4efb\u52d9\u6642\u6700\u9069\u5408\u4f7f\u7528 CatBoost \u4e86\u3002\u771f\u7684\u662f\u6240\u8b02\u7684\u61f6\u4eba\u5957\u4ef6\uff0c\u540d\u7b26\u5176\u5be6\u7684 Ying Train Yi Fa(\u786cTrain\u4e00\u767c)! Reference Tutorial: CatBoost Overview SHAP Catboost tutorial CatBoost regression in 6 minutes Catboost\uff1a\u8d85\u8d8aLightgbm\u548cXGBoost\u7684\u53c8\u4e00\u500bboost\u7b97\u6cd5\u795e\u5668 CatBoost\u3001LightGBM\u3001XGBoost\uff0c\u9019\u4e9b\u7b97\u6cd5\u4f60\u90fd\u77ad\u89e3\u55ce\uff1f \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost"},{"location":"18.CatBoost/#day-18-boosting-catboost","text":"","title":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost"},{"location":"18.CatBoost/#_1","text":"\u4e86\u89e3 CatBoost \u6a21\u578b \u5be6\u4f5c CatBoost \u8ff4\u6b78\u6a21\u578b-\u623f\u50f9\u9810\u6e2c \u6a21\u578b\u8a13\u7df4\u3001\u7279\u5fb5\u7be9\u9078 \u8d85\u53c3\u6578\u641c\u7d22 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u53ef\u89e3\u91cb\u5316\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"18.CatBoost/#_2","text":"CatBoost \u540c\u6a23\u662f\u57fa\u65bc Gradient Boosting Tree \u7684\u68af\u5ea6\u63d0\u5347\u6a39\u6a21\u578b\u6846\u67b6\uff0c\u6700\u5927\u7684\u7279\u9ede\u5c0d\u985e\u5225\u7279\u5fb5\u7684\u76f4\u63a5\u652f\u63f4\uff0c\u751a\u81f3\u5141\u8a31\u5b57\u4e32\u985e\u578b\u7684\u7279\u5fb5\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u8fd1\u5e74\u4f86\u5728 Kaggle \u4e0a\u7684\u6bd4\u8cfd\u9678\u7e8c\u6709\u4eba\u4f7f\u7528 CatBoost \u65b9\u6cd5\u4e26\u53d6\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u65bc\u662f\u5c31\u4f86\u64b0\u5beb\u6587\u7ae0\u9806\u4fbf\u4f86\u77a7\u77a7\u5b83\u8207\u5176\u4ed6 Boosting \u6f14\u7b97\u6cd5\u4e0d\u540c\u4e4b\u8655\u3002\u5176\u4e2d\u6700\u7279\u5225\u7684\u5730\u65b9\u662f CatBoost \u80fd\u5920\u8655\u7406\u975e\u6578\u503c\u578b\u614b\u7684\u8cc7\u6599\uff0c\u4e5f\u5c31\u662f\u8aac\u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002CatBoost \u63a1\u7528\u6c7a\u7b56\u6a39\u68af\u5ea6\u63d0\u5347\u65b9\u6cd5\u4e26\u5ba3\u7a31\u5728\u6548\u80fd\u4e0a\u6bd4 XGBoost \u548c LightGBM \u66f4\u52a0\u512a\u5316\uff0c\u540c\u6642\u652f\u63f4 CPU \u548c GPU \u904b\u7b97\u3002\u8207\u5176\u4ed6 Boosting \u65b9\u6cd5\u76f8\u6bd4 CatBoost \u662f\u4e00\u7a2e\u76f8\u5c0d\u8f03\u65b0\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u7b97\u6cd5\u3002\u8a72\u6f14\u7b97\u6cd5\u662f\u7531\u4e00\u9593\u4fc4\u7f85\u65af\u7684\u516c\u53f8 Yandex \u65bc 2017 \u5e74\u6240\u63d0\u51fa\uff0c\u540c\u6642\u5728 arxiv \u6709\u4e00\u7bc7 CatBoost: unbiased boosting with categorical features \u7684\u8ad6\u6587\uff0c\u6587\u4e2d\u4f5c\u8005\u8a73\u7d30\u8aaa\u660e\u4e86 CatBoost \u7684\u65b9\u6cd5\u8207\u512a\u9ede\u3002","title":"\u524d\u8a00"},{"location":"18.CatBoost/#catboost","text":"CatBoost \u540d\u7a31\u6e90\u65bc Category \u548c Boost \u5169\u500b\u55ae\u8a5e\uff0c\u627f\u8972 Boosting \u7684\u512a\u9ede\u4e4b\u5916\u8a72\u6f14\u7b97\u6cd5\u5728\u985e\u5225\u578b\u7684\u7279\u5fb5\u4e0a\u505a\u4e86\u4e00\u4e9b\u66f4\u516c\u5e73\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5141\u8a31\u6c92\u6709\u7de8\u78bc\u7684\u985e\u5225\u7279\u5fb5\uff0c\u900f\u904e\u5206\u985e\u548c\u6578\u5b57\u7279\u5fb5\u7d44\u5408\u7684\u5404\u7a2e\u7d71\u8a08\u91cf\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7de8\u78bc\u3002\u4e0d\u904e\u5728\u8a13\u7df4\u524d\u5fc5\u9808\u78ba\u4fdd\u8a72\u7279\u5fb5\u4e2d\u7121\u7f3a\u5931\u503c\u3002\u5176\u8a13\u7df4\u8cc7\u6599\u82e5\u6709\u7f3a\u5931\u503c CatBoost \u9810\u8a2d\u6703\u5c07\u6578\u503c\u578b\u7684\u8cc7\u6599\u88dc\u4e0a\u6700\u5c0f\u503c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5 \u53c3\u8003 \u3002\u53e6\u5916\u5c0d\u65bc GPU \u7684\u4f7f\u7528\u8005\uff0c\u5b83\u4e5f\u80fd\u8655\u7406\u5b57\u4e32(\u985e\u5225)\u578b\u614b\u7684\u7279\u5fb5\u3002 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u81ea\u52d5\u8655\u7406\u7f3a\u5931\u503c \u53ef\u4ee5\u8655\u7406\u5404\u7a2e\u6578\u64da\u985e\u578b\uff0c\u5982\u97f3\u983b\u3001\u6587\u5b57\u3001\u5716\u50cf \u6e1b\u5c11\u4eba\u5de5\u8abf\u53c3\u7684\u9700\u8981\uff0c\u4e26\u964d\u4f4e\u4e86\u904e\u64ec\u5408\u7684\u6a5f\u6703","title":"CatBoost \u512a\u9ede"},{"location":"18.CatBoost/#catboost_1","text":"CatBoost \u6f14\u7b97\u6cd5\u53ef\u4ee5\u89e3\u6c7a\u5206\u985e (CatBoostClassifier) \u548c\u8ff4\u6b78 (CatBoostRegressor) \u7684\u554f\u984c\u3002\u5b89\u88dd\u7684\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u4f7f\u7528 pip \u5c31\u80fd\u8f15\u9b06\u5b89\u88dd\u3002 pip install catboost","title":"CatBoost \u5b89\u88dd"},{"location":"18.CatBoost/#catboost-parameters","text":"CatBoost \u57fa\u672c\u4e0a\u53ef\u4ee5\u81ea\u7531\u7684\u8b93\u6f14\u7b97\u6cd5\u53bb\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff0c\u4e0d\u904e API \u9084\u662f\u63d0\u4f9b\u4e00\u4e9b\u57fa\u672c\u7684\u8d85\u53c3\u6578\u8b93\u4f7f\u7528\u8005\u624b\u52d5\u8abf\u6574\u3002 Parameters: - iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba 1000\u3002 - use_best_model: \u8a2d\u5b9a True \u6642\u5fc5\u9808\u7d66\u5b9a\u9a57\u8b49\u96c6\uff0c\u5c07\u6703\u7559\u4e0b\u9a57\u8b49\u96c6\u5206\u4e2d\u6578\u6700\u9ad8\u7684\u6a21\u578b\u3002 - early_stopping_rounds: \u9023\u7e8c\u8a13\u7df4N\u4ee3\uff0c\u82e5\u7d50\u679c\u672a\u6539\u5584\u5247\u63d0\u65e9\u505c\u6b62\u8a13\u7df4\u3002 - od_type: IncToDec/Iter\uff0c\u9810\u8a2d Iter \u9632\u6b62 Overfitting \u8a55\u4f30\u65b9\u5f0f\uff0c\u82e5\u8a2d\u5b9a\u524d\u8005\u9700\u8981\u8a2d\u5b9a\u95a5\u503c\u3002 - eval_metric: \u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002 - loss_function: \u8a08\u7b97loss\u65b9\u6cd5\u3002 - verbose: True(1)/Flase(0)\uff0c\u9810\u8a2d1\u986f\u793a\u8a13\u7df4\u904e\u7a0b\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - learning_rate: \u9810\u8a2d automatically\u3002 - depth: \u6a39\u7684\u6df1\u5ea6\uff0c\u9810\u8a2d 6\u3002 - cat_features: \u8f38\u5165\u985e\u5225\u7279\u5fb5\u7684\u7d22\u5f15\uff0c\u5b83\u6703\u81ea\u52d5\u5e6b\u4f60\u8655\u7406\u3002 \u53c3\u8003 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 \u5982\u679c\u9700\u8981\u624b\u52d5\u8655\u7406 Overfitting \u554f\u984c\u53ef\u4ee5\u53c3\u8003\u9019\u4efd\u5b98\u65b9 \u6587\u4ef6","title":"CatBoost Parameters"},{"location":"18.CatBoost/#_3","text":"\u6a21\u578b\u8a13\u7df4\u65b9\u5f0f\u57fa\u672c\u4e0a\u8207 XGBoost \u4e00\u6a23\uff0c\u5982\u679c\u4f60\u719f\u6089 sklearn \u7684\u8a71 CatBoost \u7684\u4f7f\u7528\u65b9\u5f0f\u57fa\u672c\u4e0a\u5927\u540c\u5c0f\u7570\u3002\u53ea\u4e0d\u904e\u5728 CatBoost \u4e2d\u591a\u4e86\u4e00\u4e9b\u65b9\u4fbf\u7684\u65b9\u6cd5\u548c\u53c3\u6578\u53ef\u4ee5\u4f7f\u7528\u3002\u50cf\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u52a0\u4e0a plot=True \uff0c\u4e26\u5728 eval_set \u53c3\u6578\u4e2d\u63d2\u5165\u6e2c\u8a66\u96c6\u53ef\u4ee5\u5373\u6642\u770b\u5230\u8a13\u7df4\u904e\u7a0b\u7684\u8996\u89ba\u5316\u5206\u6790\u3002\u751a\u81f3\u53ef\u4ee5\u4f7f\u7528\u4ea4\u53c9\u9a57\u8b49\uff0c\u5728\u4e0d\u540c\u7684\u5206\u5272\u4e0a\u89c0\u5bdf\u6a21\u578b\u6e96\u78ba\u5ea6\u7684\u5e73\u5747\u548c\u6a19\u6e96\u504f\u5dee\u3002 from catboost import CatBoostRegressor # \u5efa\u7acb\u6a21\u578b model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b model . fit ( X_train , y_train , eval_set = ( X_test , y_test ), verbose = 0 , plot = True )","title":"\u6a21\u578b\u8a13\u7df4"},{"location":"18.CatBoost/#_4","text":"\u8a13\u7df4\u904e\u7a0b\u4e2d\u6703\u81ea\u52d5\u5f9e\u8cc7\u6599\u4e2d\u7be9\u9078\u5c0d\u6a21\u578b\u9810\u6e2c\u6709\u7528\u7684\u7279\u5fb5\uff0c\u4e26\u79fb\u9664\u7121\u5e6b\u52a9\u9810\u6e2c\u7684\u7279\u5fb5\u3002 \u53c3\u8003 from catboost import CatBoostRegressor , Pool , EShapCalcType , EFeaturesSelectionAlgorithm # feature_names = ['F{}'.format(i) for i in range(X_train.shape[1])] train_pool = Pool ( X_train , y_train , feature_names = boston_dataset . feature_names . tolist ()) test_pool = Pool ( X_test , y_test , feature_names = boston_dataset . feature_names . tolist ()) model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) summary = model . select_features ( train_pool , eval_set = test_pool , features_for_select = '0-12' , num_features_to_select = 3 , steps = 2 , algorithm = EFeaturesSelectionAlgorithm . RecursiveByShapValues , shap_calc_type = EShapCalcType . Regular , train_final_model = True , logging_level = 'Silent' , plot = False ) summary \u7531\u65bc\u5728\u8a13\u7df4\u5c07 num_features_to_select \u8a2d\u70ba\u4e09\uff0c\u5373\u8868\u793a\u6a21\u578b\u8a13\u7df4\u6642\u6703\u62ff\u53d6\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u7576\u4f5c\u505a\u4e2d\u6a21\u578b\u9810\u6e2c\u65b9\u5f0f\u3002\u6211\u5011\u63a1\u7528 sklearn \u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u70ba ['RM', 'PTRATIO', 'LSTAT']\u3002\u5982\u679c\u4f60\u6709\u505a EDA \u53ef\u4ee5\u767c\u73fe\u9019\u4e09\u500b\u7279\u5fb5\u8207\u623f\u50f9\u7684\u95dc\u806f\u6027\u90fd\u5f88\u9ad8\u3002 {'selected_features': [5, 10, 12], 'eliminated_features_names': ['DIS', 'B', 'ZN', 'CHAS', 'RAD', 'INDUS', 'CRIM', 'AGE', 'TAX', 'NOX'], 'eliminated_features': [7, 11, 1, 3, 8, 2, 0, 6, 9, 4], 'selected_features_names': ['RM', 'PTRATIO', 'LSTAT']}","title":"\u7279\u5fb5\u7be9\u9078"},{"location":"18.CatBoost/#grid-search","text":"\u9664\u6b64\u4e4b\u5916 CatBoost \u63d0\u4f9b\u5c0d\u6a21\u578b\u7684\u6307\u5b9a\u53c3\u6578\u503c\u9032\u884c\u7c21\u55ae\u7684\u7db2\u683c\u641c\u7d22\uff0c\u5982\u679c\u6709\u4f7f\u7528\u904e sklearn \u7684 Grid Search \u5176\u5be6\u4ed6\u5c31\u662f\u4e00\u6a23\u7684\u4f7f\u7528\u65b9\u5f0f\u3002 \u53c3\u8003 from catboost import CatBoostRegressor grid = { 'iterations' : [ 100 , 150 , 200 ], 'learning_rate' : [ 0.03 , 0.1 ], 'depth' : [ 2 , 4 , 6 , 8 ], 'l2_leaf_reg' : [ 0.2 , 0.5 , 1 , 3 ]} model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' ) model . grid_search ( grid , X_train , y_train )","title":"Grid search"},{"location":"18.CatBoost/#_5","text":"CatBoost \u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002\u4e0b\u9762\u7a0b\u5f0f\u70ba\u4e00\u500b\u5206\u985e\u554f\u984c\u7684\u7bc4\u4f8b\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u7684\u7b2c\u4e00\u500b\u70ba\u5b63\u7bc0\u3002\u5728\u6a5f\u5668\u5b78\u7fd2\u4e0a\u7684\u8a8d\u77e5\u6211\u5011\u5fc5\u9808\u5c07\u6240\u4ee5\u5b57\u4e32\u578b\u8cc7\u6599\u5fc5\u9808\u900f\u904e\u6a19\u7c64\u7de8\u78bc\u65b9\u5f0f\u8f49\u63db\u6210\u6578\u503c\uff0c\u7136\u800c\u5728 CatBoost \u5b8c\u5168\u4e0d\u9700\u8981\u3002\u50c5\u9700\u5728\u8a13\u7df4\u6a21\u578b\u6642\u7d66\u4e88\u53c3\u6578 cat_features = [0] \u5373\u4ee3\u8868\u8cc7\u6599\u7684\u7b2c\u4e00\u500b\u7279\u5fb5\u9700\u8981\u9032\u884c\u985e\u5225\u8f49\u63db\u3002\u53e6\u5916\u8f38\u51fa\u8449\u4e0d\u4e00\u5b9a\u8981\u7de8\u78bc\u5f8c\u7684\u7d50\u679c\uff0c\u4f60\u4e5f\u53ef\u4ee5\u4e1f\u5165\u6587\u5b57\u9032\u884c\u8a13\u7df4\u53ea\u8981\u52a0\u4e0a loss_function='MultiClass' \u5373\u53ef\u3002 from catboost import Pool , CatBoostClassifier train_data = [[ \"summer\" , 1924 , 44 ], [ \"summer\" , 1932 , 37 ], [ \"winter\" , 1980 , 37 ], [ \"summer\" , 2012 , 204 ]] eval_data = [[ \"winter\" , 1996 , 197 ], [ \"winter\" , 1968 , 37 ], [ \"summer\" , 2002 , 77 ], [ \"summer\" , 1948 , 59 ]] train_label = [ \"France\" , \"USA\" , \"USA\" , \"UK\" ] eval_label = [ \"USA\" , \"France\" , \"USA\" , \"UK\" ] # Initialize CatBoostClassifier model = CatBoostClassifier ( iterations = 10 , learning_rate = 1 , depth = 2 , cat_features = [ 0 ], loss_function = 'MultiClass' ) # Fit model model . fit ( train_data , train_label ) # Get predicted classes preds_class = model . predict ( eval_data ) # Get predicted probabilities for each class preds_proba = model . predict_proba ( eval_data ) # Get predicted RawFormulaVal preds_raw = model . predict ( eval_data , prediction_type = 'RawFormulaVal' ) \u53c3\u8003","title":"\u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5"},{"location":"18.CatBoost/#verbose","text":"\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u96a8\u6642\u89c0\u5bdf\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684loss\uff0c\u4f7f\u7528verbose=10\u5373\u4ee3\u8868\u6bcf10\u6b21\u8fed\u4ee3\u6703\u986f\u793a\u4e00\u6b21\u8cc7\u8a0a\uff0c\u9019\u7a2e\u65b9\u5f0f\u4e5f\u89e3\u6c7a\u6bcf\u6b21\u758a\u4ee3\u986f\u793a\u4e00\u6b21\u7684\u56f0\u64fe\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5269\u9918\u6642\u9593\u4e5f\u6703\u986f\u793a\u51fa\u4f86\u3002","title":"\u5584\u7528 Verbose"},{"location":"18.CatBoost/#_6","text":"CatBoost \u63d0\u4f9b\u4e86 plot \u53ef\u4ee5\u65b9\u4fbf\u5728\u8a13\u7df4\u6642\u67e5\u770b\u4e26\u4f5c\u5373\u6642\u5206\u6790\u8a13\u7df4\u8da8\u52e2\u3002\u9664\u6b64\u4e4b\u5916 CatBoost \u4e5f\u652f\u63f4 SHAP \u589e\u52a0\u4e86\u6a21\u578b\u53ef\u89e3\u91cb\u3002\u8a73\u7d30\u7684\u4f7f\u7528\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 \u6559\u5b78 \u3002","title":"\u6a21\u578b\u7684\u89e3\u91cb"},{"location":"18.CatBoost/#_7","text":"CatBoost \u7684\u512a\u9ede\u548c\u4f7f\u7528\u65b9\u6cd5\u90fd\u4ecb\u7d39\u5b8c\u4e86\uff0c\u662f\u4e0d\u662f\u89ba\u5f97\u5341\u5206\u7c21\u55ae\u6613\u7528\u4e14\u529f\u80fd\u5f37\u5927\u3002\u5c24\u5176\u662f\u9047\u5230\u8cc7\u6599\u9700\u8981\u5927\u91cf\u8655\u7406\u548c\u7279\u5fb5\u6578\u503c\u5316\u7684\u4efb\u52d9\u6642\u6700\u9069\u5408\u4f7f\u7528 CatBoost \u4e86\u3002\u771f\u7684\u662f\u6240\u8b02\u7684\u61f6\u4eba\u5957\u4ef6\uff0c\u540d\u7b26\u5176\u5be6\u7684 Ying Train Yi Fa(\u786cTrain\u4e00\u767c)!","title":"\u5c0f\u7d50"},{"location":"18.CatBoost/#reference","text":"Tutorial: CatBoost Overview SHAP Catboost tutorial CatBoost regression in 6 minutes Catboost\uff1a\u8d85\u8d8aLightgbm\u548cXGBoost\u7684\u53c8\u4e00\u500bboost\u7b97\u6cd5\u795e\u5668 CatBoost\u3001LightGBM\u3001XGBoost\uff0c\u9019\u4e9b\u7b97\u6cd5\u4f60\u90fd\u77ad\u89e3\u55ce\uff1f \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"19.AutoML/","text":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u4f55\u8b02 AutoML \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 Grid Search Random Search Bayesian Optimization AutoML \u7684\u52d5\u6a5f \u5927\u5bb6\u9084\u8a18\u5f97\u5728 [Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4e2d\u6709\u63d0\u5230\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u7136\u800c\u6a21\u578b\u7684\u8a13\u7df4\u8207\u8d85\u53c3\u6578\u8abf\u6574\u50c5\u626e\u6f14\u5176\u4e2d\u7684\u4e00\u74b0\uff0c\u9078\u64c7\u4e00\u500b\u597d\u7684\u6a21\u578b\u662f\u4ef6\u91cd\u8981\u7684\u4e8b\u60c5\u3002\u60f3\u5fc5\u5927\u5bb6\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e00\u5b9a\u6703\u9047\u5230\u4e00\u500b\u68d8\u624b\u7684\u554f\u984c\uff0c\u5c31\u662f\u8a72\u5982\u4f55\u6b63\u78ba\u9078\u64c7\u6a21\u578b\u4ee5\u53ca\u8abf\u6574\u8d85\u53c3\u6578\uff1f\u96a8\u8457\u8d8a\u4f86\u8d8a\u591a\u7684\u6f14\u7b97\u6cd5\u4e0d\u65b7\u5730\u88ab\u958b\u767c\u51fa\u4f86\uff0c\u8981\u5f9e\u832b\u832b\u5927\u6d77\u4e2d\u6311\u9078\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u662f\u4ef6\u8017\u6642\u7684\u4e8b\u3002\u56e0\u6b64\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 (Automated Machine Learning ,AutoML) \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5728\u6709\u9650\u7684\u6642\u9593\u5167\u627e\u51fa\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002\u5728\u8fd1\u5e74\u4f86\u6709\u8a31\u591a\u4eba\u958b\u59cb\u7814\u7a76\u9019\u985e\u7684\u554f\u984c\uff0c\u7b46\u8005\u5f59\u6574\u4e86\u5e7e\u500b Python \u71b1\u9580\u7684 AutoML \u958b\u6e90\u5957\u4ef6\uff1a AutoGluon Auto-sklearn FLAML H2O AutoML LightAutoML Pycaret MLJAR TPOT MLBox Auto-PyTorch AutoKeras talos AutoML \u626e\u6f14\u7684\u89d2\u8272 \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u63d0\u4f9b\u4e86\u4e00\u7cfb\u5217\u7684\u65b9\u6cd5\u548c\u81ea\u52d5\u5316\u7684\u5b78\u7fd2\u6d41\u7a0b\uff0c\u4ee5\u63d0\u9ad8\u6a5f\u5668\u5b78\u7fd2\u7684\u6548\u7387\u4e26\u52a0\u901f\u6a5f\u5668\u5b78\u7fd2\u7684\u7814\u7a76\u3002\u900f\u904e AutoML \u96c6\u7d50\u5c08\u5bb6\u7684\u5148\u9a57\u77e5\u8b58\uff0c\u5927\u5e45\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u56f0\u96e3\u5ea6\u3002\u96d6\u7136\u9818\u57df\u5c08\u5bb6\u8207 AI \u5de5\u7a0b\u5e2b\u5fc5\u7136\u626e\u6f14\u91cd\u8981\u7684\u89d2\u8272\uff0c\u4f46\u662f\u8fd1\u5e74\u4f86 No Code \u7121\u7a0b\u5f0f\u78bc\u958b\u767c\u5e73\u53f0\u5f62\u6210\u4e00\u80a1\u6f6e\u6d41\u3002AI \u518d\u4e5f\u4e0d\u662f\u9700\u8981\u8cc7\u8a0a\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\uff0c\u76ee\u7684\u662f\u8b93\u5927\u5bb6\u4e0d\u7528\u900f\u904e\u5beb\u7a0b\u5f0f\u4e5f\u80fd\u5feb\u901f\u5730\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u7acb\u9810\u6e2c\u6a21\u578b\u3002\u7136\u800c\u8fd1\u5e74\u4f86\u8a31\u591a\u4f01\u696d\u958b\u767c\u4e86\u5404\u7a2e\u9700\u6c42\u7684 AutoML \u5e73\u53f0\uff0c\u5982\u96e8\u5f8c\u6625\u96e8\u822c\u7684\u51fa\u73fe\uff1a Google: Cloud AutoML Microsoft: Azure Machine Learning Amazon: SageMaker Autopilot Landing AI: LandingLens Chimes AI: tukey AutoML \u80fd\u5e6b\u52a9\u591a\u5c11\u4e8b\u60c5 \u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u662f\u4e00\u500b\u8fed\u4ee3\u7684\u5faa\u74b0\u9031\u671f\uff0c\u5f9e\u5b9a\u7fa9\u554f\u984c\u3001\u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u3001\u6a21\u578b\u8a2d\u8a08\u5230\u6700\u7d42\u6a21\u578b\u90e8\u7f72\uff0c\u6bcf\u500b\u6b65\u9a5f\u6975\u70ba\u91cd\u8981\u4e14\u7f3a\u4e00\u4e0d\u53ef\u3002\u6b64\u5916\u4e00\u500b\u597d\u7684\u6a5f\u5668\u5b78\u7fd2\u7684\u5c08\u6848\u9700\u8981\u57f7\u884c MLOps \u7684\u6d41\u7a0b\uff0c\u624d\u80fd\u5920\u8b93\u6a21\u578b\u5728\u5be6\u969b\u61c9\u7528\u5834\u666f\u8d8a\u4f86\u8d8a\u597d\u771f\u5be6\u5730\u89e3\u6c7a\u554f\u984c\u3002MLOps \u6307\u7684\u662f\u5f9e AI \u6a21\u578b\u8a13\u7df4\u5230\u90e8\u7f72\u4e0a\u7dda\u7684\u4e00\u5957\u5b8c\u6574\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\uff0c\u8fd1\u5e74\u4f86\u9019\u4e00\u540d\u8a5e\u975e\u5e38\u71b1\u9580\uff0c\u5b83\u5176\u5be6\u5c31\u662f ML (\u6a5f\u5668\u5b78\u7fd2) \u8207 DevOps (\u958b\u767c\u8207\u7dad\u904b) \u7684\u7d50\u5408\u3002\u5982\u4e0b\u5716\u6240\u793a\u5f9e\u8a13\u7df4\u6a21\u578b\u5230\u6b63\u5f0f\u90e8\u7f72\u4e2d\u9593\u9084\u6709\u8a31\u591a\u4e8b\u60c5\u8981\u8655\u7406\uff0c\u800c\u6a21\u578b\u4e0a\u7dda\u5f8c\u9084\u662f\u8981\u6301\u7e8c\u76e3\u63a7\u4e26\u6536\u96c6\u65b0\u7684\u5834\u57df\u8cc7\u6599\u3002\u6700\u5f8c\u5c07\u8cc7\u6599\u6536\u96c6\u5230\u4e00\u5b9a\u7a0b\u5ea6\uff0c\u53c8\u56de\u5230\u9031\u671f\u7684\u7b2c\u4e00\u6b65\u91cd\u65b0\u8a13\u7df4\u65b0\u6a21\u578b\u3002\u81f3\u65bc\u6a21\u578b\u8a72\u5982\u4f55\u91cd\u65b0\u8a13\u7df4\u4e26\u4fdd\u6301\u8cc7\u6599\u7684\u96b1\u79c1\u6027\u5c31\u662f\u53e6\u4e00\u9580\u8b70\u984c\u3002\u9019\u6642\u5019\u6211\u5011\u5c31\u80fd\u63a1\u7528\u4e00\u500b\u6280\u8853\u53eb\u505a Federated Learning (\u806f\u5408\u5b78\u7fd2) \u60f3\u8fa6\u6cd5\u8655\u7406\u9019\u985e\u7684\u4e8b\u60c5\u3002 \u4f46\u662f\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u8a13\u7df4\u4e00\u500b\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\uff0c\u5728 MLOps \u7684\u9031\u671f\u4e2d\u50c5\u626e\u6f14\u5c0f\u5c0f\u7684\u4e00\u584a\u89d2\u8272\u3002\u4e0b\u5716\u662f\u4e00\u500b\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u7684\u57fa\u672c\u6d41\u7a0b\uff0c\u4e2d\u9593\u6a58\u8272\u7684\u90e8\u5206\u5c31\u662f AutoML \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u7684\u4e8b\u3002\u5f9e\u8cc7\u6599\u524d\u8655\u7406\u3001\u8a13\u7df4\u6a21\u578b\u5230\u8a55\u4f30\u6a21\u578b\u9700\u8981\u4e0d\u65b7\u5730\u7684\u9032\u884c\u8a66\u9a57\uff0c\u4e26\u4e14\u5617\u8a66\u5404\u7a2e\u4e0d\u540c\u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u6a21\u578b\u8d85\u53c3\u6578\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\uff0c\u90fd\u53ef\u4ee5\u900f\u904e AutoML \u81ea\u52d5\u5316\u7684\u8a13\u7df4\u627e\u5230\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002 \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 \u6a5f\u5668\u5b78\u7fd2\u81ea\u52d5\u5316\u7684\u56f0\u96e3\u9ede\u5728\u65bc\u8cc7\u6599\u6e05\u6d17\u8207\u7279\u5fb5\u5de5\u7a0b\u6280\u5de7\u3002\u4e00\u500b\u597d\u7684\u7279\u5fb5\u8868\u9054\u53ef\u4ee5\u8b93\u6a21\u578b\u5feb\u901f\u5730\u6293\u5230\u95dc\u9375\u56e0\u5b50\uff0c\u4e26\u8b93\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u63d0\u5347\u3002\u6176\u5e78\u7684\u662f\u6a21\u578b\u6311\u9078\u548c\u8d85\u53c3\u6578\u8abf\u6574\u5df2\u7d93\u6709\u6bd4\u8f03\u6210\u719f\u7684\u65b9\u6cd5\u53ef\u4ee5\u5354\u52a9\u6211\u5011\u6709\u6548\u7684\u641c\u5c0b\u3002 Grid Search \u7db2\u683c\u641c\u7d22/\u7aae\u8209\u641c\u7d22 Random Search \u96a8\u6a5f\u641c\u7d22 Bayesian Optimization \u8c9d\u8449\u65af\u512a\u5316 Grid Search Grid Search (\u7db2\u683c\u641c\u7d22) \u53c8\u7a31\u7aae\u8209\u641c\u7d22\u3002\u5b83\u7684\u641c\u7d22\u65b9\u5f0f\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u4e2d\uff0c\u900f\u904e\u6392\u5217\u7d44\u5408\u5617\u8a66\u6bcf\u4e00\u7a2e\u53ef\u80fd\u6027\u3002\u4e26\u5c07\u8868\u73fe\u6700\u597d\u7684\u53c3\u6578\u6700\u70ba\u6700\u7d42\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u7d50\u679c\u3002\u4ed6\u7684\u7f3a\u9ede\u5c31\u662f\u7576\u6709\u8a31\u591a\u8d85\u53c3\u6578\u8981\u5c0b\u627e\u6642\uff0c\u4ed6\u7684\u6392\u5217\u7d44\u5408\u5c31\u6703\u8b8a\u5f97\u975e\u5e38\u591a\uff0c\u5c0e\u81f4\u641c\u7d22\u7684\u6642\u9593\u8b8a\u9577\u82b1\u8cbb\u7684\u8cc7\u6e90\u4e5f\u8b8a\u5927\u3002\u56e0\u6b64\u9019\u7a2e\u66b4\u529b\u5f0f\u7684\u641c\u7d22\u65b9\u6cd5\u9069\u5408\u5728\u5c0f\u7684\u8cc7\u6599\u96c6\u4e0a\u88ab\u63a1\u7528\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u6709\u63d0\u4f9b GridSearchCV \u65b9\u6cd5\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u53c3\u6578\u5217\u8868\uff0c\u4e26\u900f\u904e\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u7d44\u5408\u4e00\u500b\u4e00\u500b\u5617\u8a66\u627e\u5230\u6700\u5408\u9069\u7684\u53c3\u6578\u3002 from sklearn import svm , datasets from sklearn.model_selection import GridSearchCV # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = datasets . load_iris () # \u8a2d\u5b9a\u60f3\u8981\u7684\u641c\u7d22\u53c3\u6578\u4e26\u7d66\u4e88\u5019\u9078\u503c parameters = { 'kernel' :( 'linear' , 'rbf' ), 'C' :[ 1 , 10 ]} # \u5efa\u7acb SVC \u5206\u985e\u5668 svc = svm . SVC () # \u7db2\u683c\u641c\u7d22\u6240\u6709\u53ef\u80fd\u7684\u7d44\u5408(2*2)\u5171\u56db\u7a2e clf = GridSearchCV ( svc , parameters ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b clf . fit ( iris . data , iris . target ) \u641c\u7d22\u7d50\u675f\u5f8c\u4e5f\u80fd\u5920\u904e cv_results_ \u67e5\u770b\u6240\u6709\u7d44\u5408\u7684\u8d85\u53c3\u6578\u6240\u5c0d\u61c9\u7684\u8a13\u7df4\u7d50\u679c\u3002 clf . cv_results_ Random Search Random Search (\u96a8\u6a5f\u641c\u7d22) \u6309\u7167\u5b57\u9762\u4e0a\u7684\u610f\u601d\u5c31\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u5019\u9078\u53c3\u6578\u4e2d\u96a8\u6a5f\u6311\u9078\u4e00\u500b\u6578\u503c\u4e26\u5617\u8a66\u3002\u5982\u679c\u9700\u8981\u8abf\u7684\u53c3\u6578\u8f03\u591a\u7684\u6642\u5019\uff0c\u4f7f\u7528\u96a8\u6a5f\u641c\u7d22\u53ef\u4ee5\u964d\u4f4e\u641c\u7d22\u6642\u9593\uff0c\u540c\u6642\u53c8\u80fd\u78ba\u4fdd\u4e00\u5b9a\u7684\u6a21\u578b\u6e96\u78ba\u6027\u3002\u5728 Sklearn \u5957\u4ef6\u4e2d\u4e5f\u6709\u63d0\u4f9b RandomizedSearchCV \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff0c\u8207\u7db2\u683c\u641c\u7d22\u7684\u5dee\u5225\u5728\u65bc\u4f7f\u7528\u8005\u53ef\u4ee5\u5c07\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d\u3002\u8a72\u65b9\u6cd5\u6703\u5728\u6b64\u7bc4\u570d\u4e2d\u96a8\u6a5f\u62bd\u4e00\u500b\u6578\u503c\u4e26\u9032\u884c\u6a21\u578b\u8a13\u7df4\u4e26\u9a57\u8b49\u6a21\u578b\u3002\u4e26\u627e\u51fa\u6240\u6709\u96a8\u6a5f\u7d44\u5408\u4e2d\u8868\u73fe\u6700\u597d\u7684\u4e00\u7d44\u8d85\u53c3\u6578\u3002 from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RandomizedSearchCV from scipy.stats import uniform # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = load_iris () # \u5efa\u7acb\u908f\u8f2f\u8ff4\u6b78\u6a21\u578b logistic = LogisticRegression ( solver = 'saga' , tol = 1e-2 , max_iter = 200 , random_state = 0 ) # \u8a2d\u5b9a\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u4e26\u7d66\u4e88\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d distributions = dict ( C = uniform ( loc = 0 , scale = 4 ), penalty = [ 'l2' , 'l1' ]) # \u96a8\u6a5f\u641c\u7d22\u9810\u8a2d n_iter=10 clf = RandomizedSearchCV ( logistic , distributions , random_state = 0 , n_iter = 10 ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b search = clf . fit ( iris . data , iris . target ) search . best_params_ Bayesian Optimization Bayesian Optimization (\u8c9d\u8449\u65af\u512a\u5316) \u76ee\u6a19\u662f\u8981\u5728\u6700\u5c11\u7684\u8a66\u9a57\u4e0b\u5c0b\u627e\u4e00\u7d44\u6700\u4f73\u7684\u8d85\u53c3\u6578\u4f7f\u5f97\u932f\u8aa4\u7387\u80fd\u5920\u8d8a\u4f4e\u8d8a\u597d\u3002\u7531\u65bc\u6211\u5011\u6240\u6536\u96c6\u5230\u7684\u8cc7\u6599\u7121\u5f9e\u5f97\u77e5\u8a72\u6a21\u578b\u7684\u76ee\u6a19\u51fd\u6578\u662f\u9577\u600e\u6a23\uff0c\u56e0\u6b64\u6a5f\u5668\u5b78\u7fd2\u7684\u76ee\u7684\u5c31\u662f\u8981\u5f9e\u9019\u4e9b\u8cc7\u6599\u4e2d\u53bb\u64ec\u5408\u4e00\u500b\u51fd\u6578\uff0c\u76ee\u6a19\u662f\u7d66\u4e88\u4e00\u7b46\u8f38\u5165 X \u8a72\u51fd\u6578\u7684\u8f38\u51fa\u8981\u8207\u771f\u5be6\u7684\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u900f\u904e\u4ee3\u7406\u512a\u5316 (surrogate optimization) \u4f7f\u7528\u4e00\u500b\u4ee3\u7406\u51fd\u6578\u4f86\u4f30\u8a08\u76ee\u6a19\u51fd\u6578\u3002\u7c21\u55ae\u4f86\u8aaa\u4ee3\u7406\u51fd\u6578\u662f\u6307\u76ee\u6a19\u51fd\u6578\u7684\u4e00\u7a2e\u8fd1\u4f3c\uff0c\u6b64\u5916\u4ee3\u7406\u51fd\u6578\u53ef\u57fa\u65bc\u53d6\u6a23\u5f97\u5230\u7684\u8cc7\u6599\u9ede\u88ab\u69cb\u5efa\u51fa\u4f86\u3002 \u4ee3\u7406\u51fd\u6578\u7684\u76ee\u7684\u662f\u5728\u7d66\u5b9a\u4e00\u7d44\u7279\u5b9a\u7684\u5019\u9078\u8d85\u53c3\u6578\u7684\u60c5\u6cc1\u4e0b\u5feb\u901f\u4f30\u8a08\u5be6\u969b\u6a21\u578b\u7684\u932f\u8aa4\u7387\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u53ef\u4ee5\u5feb\u901f\u6c7a\u5b9a\u8a72\u7d44\u8d85\u53c3\u6578\u662f\u5426\u53ef\u4ee5\u88ab\u62ff\u4f86\u5be6\u969b\u8a13\u7df4\u6a21\u578b\u3002\u96a8\u8457\u8a66\u9a57\u6b21\u6578\u7684\u589e\u52a0\uff0c\u4ee3\u7406\u51fd\u6578\u96a8\u8457\u5148\u524d\u7684\u8a66\u9a57\u7d50\u679c\u800c\u66f4\u65b0\u6539\u9032\uff0c\u4e26\u958b\u59cb\u63a8\u85a6\u66f4\u597d\u7684\u5019\u9078\u8d85\u53c3\u6578\u3002 Auto-sklearn \u5c31\u662f\u4e00\u500b\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u7684\u4e00\u500b\u5de5\u5177\u3002\u540c\u6642\u5b83\u4e5f\u80fd\u641c\u7d22\u5728 Sklearn \u4e2d\u6240\u6709\u53ef\u80fd\u7684\u7b97\u6cd5\uff0c\u4e26\u70ba\u4f60\u63a8\u85a6\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u8207\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u3002\u660e\u5929\u6211\u5011\u5c31\u4f86\u4e00\u63a2\u7a76\u7adf\u8a72\u5957\u4ef6\u80cc\u5f8c\u7684\u795e\u79d8\u539f\u7406\u4ee5\u53ca\u7a0b\u5f0f\u5be6\u4f5c\u5427\uff01 Reference automl.org Sklearn \u5b98\u65b9\u6587\u4ef6 GridSearchCV Sklearn \u5b98\u65b9\u6587\u4ef6 RandomizedSearchCV \u7528\u7c21\u55ae\u8853\u8a9e\u8b93\u4f60\u770b\u5230\u8c9d\u8449\u65af\u512a\u5316\u4e4b\u7f8e \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML"},{"location":"19.AutoML/#day-19-automl","text":"","title":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML"},{"location":"19.AutoML/#_1","text":"\u4e86\u89e3\u4f55\u8b02 AutoML \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 Grid Search Random Search Bayesian Optimization","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"19.AutoML/#automl","text":"\u5927\u5bb6\u9084\u8a18\u5f97\u5728 [Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4e2d\u6709\u63d0\u5230\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u7136\u800c\u6a21\u578b\u7684\u8a13\u7df4\u8207\u8d85\u53c3\u6578\u8abf\u6574\u50c5\u626e\u6f14\u5176\u4e2d\u7684\u4e00\u74b0\uff0c\u9078\u64c7\u4e00\u500b\u597d\u7684\u6a21\u578b\u662f\u4ef6\u91cd\u8981\u7684\u4e8b\u60c5\u3002\u60f3\u5fc5\u5927\u5bb6\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e00\u5b9a\u6703\u9047\u5230\u4e00\u500b\u68d8\u624b\u7684\u554f\u984c\uff0c\u5c31\u662f\u8a72\u5982\u4f55\u6b63\u78ba\u9078\u64c7\u6a21\u578b\u4ee5\u53ca\u8abf\u6574\u8d85\u53c3\u6578\uff1f\u96a8\u8457\u8d8a\u4f86\u8d8a\u591a\u7684\u6f14\u7b97\u6cd5\u4e0d\u65b7\u5730\u88ab\u958b\u767c\u51fa\u4f86\uff0c\u8981\u5f9e\u832b\u832b\u5927\u6d77\u4e2d\u6311\u9078\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u662f\u4ef6\u8017\u6642\u7684\u4e8b\u3002\u56e0\u6b64\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 (Automated Machine Learning ,AutoML) \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5728\u6709\u9650\u7684\u6642\u9593\u5167\u627e\u51fa\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002\u5728\u8fd1\u5e74\u4f86\u6709\u8a31\u591a\u4eba\u958b\u59cb\u7814\u7a76\u9019\u985e\u7684\u554f\u984c\uff0c\u7b46\u8005\u5f59\u6574\u4e86\u5e7e\u500b Python \u71b1\u9580\u7684 AutoML \u958b\u6e90\u5957\u4ef6\uff1a AutoGluon Auto-sklearn FLAML H2O AutoML LightAutoML Pycaret MLJAR TPOT MLBox Auto-PyTorch AutoKeras talos","title":"AutoML \u7684\u52d5\u6a5f"},{"location":"19.AutoML/#automl_1","text":"\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u63d0\u4f9b\u4e86\u4e00\u7cfb\u5217\u7684\u65b9\u6cd5\u548c\u81ea\u52d5\u5316\u7684\u5b78\u7fd2\u6d41\u7a0b\uff0c\u4ee5\u63d0\u9ad8\u6a5f\u5668\u5b78\u7fd2\u7684\u6548\u7387\u4e26\u52a0\u901f\u6a5f\u5668\u5b78\u7fd2\u7684\u7814\u7a76\u3002\u900f\u904e AutoML \u96c6\u7d50\u5c08\u5bb6\u7684\u5148\u9a57\u77e5\u8b58\uff0c\u5927\u5e45\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u56f0\u96e3\u5ea6\u3002\u96d6\u7136\u9818\u57df\u5c08\u5bb6\u8207 AI \u5de5\u7a0b\u5e2b\u5fc5\u7136\u626e\u6f14\u91cd\u8981\u7684\u89d2\u8272\uff0c\u4f46\u662f\u8fd1\u5e74\u4f86 No Code \u7121\u7a0b\u5f0f\u78bc\u958b\u767c\u5e73\u53f0\u5f62\u6210\u4e00\u80a1\u6f6e\u6d41\u3002AI \u518d\u4e5f\u4e0d\u662f\u9700\u8981\u8cc7\u8a0a\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\uff0c\u76ee\u7684\u662f\u8b93\u5927\u5bb6\u4e0d\u7528\u900f\u904e\u5beb\u7a0b\u5f0f\u4e5f\u80fd\u5feb\u901f\u5730\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u7acb\u9810\u6e2c\u6a21\u578b\u3002\u7136\u800c\u8fd1\u5e74\u4f86\u8a31\u591a\u4f01\u696d\u958b\u767c\u4e86\u5404\u7a2e\u9700\u6c42\u7684 AutoML \u5e73\u53f0\uff0c\u5982\u96e8\u5f8c\u6625\u96e8\u822c\u7684\u51fa\u73fe\uff1a Google: Cloud AutoML Microsoft: Azure Machine Learning Amazon: SageMaker Autopilot Landing AI: LandingLens Chimes AI: tukey","title":"AutoML \u626e\u6f14\u7684\u89d2\u8272"},{"location":"19.AutoML/#automl_2","text":"\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u662f\u4e00\u500b\u8fed\u4ee3\u7684\u5faa\u74b0\u9031\u671f\uff0c\u5f9e\u5b9a\u7fa9\u554f\u984c\u3001\u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u3001\u6a21\u578b\u8a2d\u8a08\u5230\u6700\u7d42\u6a21\u578b\u90e8\u7f72\uff0c\u6bcf\u500b\u6b65\u9a5f\u6975\u70ba\u91cd\u8981\u4e14\u7f3a\u4e00\u4e0d\u53ef\u3002\u6b64\u5916\u4e00\u500b\u597d\u7684\u6a5f\u5668\u5b78\u7fd2\u7684\u5c08\u6848\u9700\u8981\u57f7\u884c MLOps \u7684\u6d41\u7a0b\uff0c\u624d\u80fd\u5920\u8b93\u6a21\u578b\u5728\u5be6\u969b\u61c9\u7528\u5834\u666f\u8d8a\u4f86\u8d8a\u597d\u771f\u5be6\u5730\u89e3\u6c7a\u554f\u984c\u3002MLOps \u6307\u7684\u662f\u5f9e AI \u6a21\u578b\u8a13\u7df4\u5230\u90e8\u7f72\u4e0a\u7dda\u7684\u4e00\u5957\u5b8c\u6574\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\uff0c\u8fd1\u5e74\u4f86\u9019\u4e00\u540d\u8a5e\u975e\u5e38\u71b1\u9580\uff0c\u5b83\u5176\u5be6\u5c31\u662f ML (\u6a5f\u5668\u5b78\u7fd2) \u8207 DevOps (\u958b\u767c\u8207\u7dad\u904b) \u7684\u7d50\u5408\u3002\u5982\u4e0b\u5716\u6240\u793a\u5f9e\u8a13\u7df4\u6a21\u578b\u5230\u6b63\u5f0f\u90e8\u7f72\u4e2d\u9593\u9084\u6709\u8a31\u591a\u4e8b\u60c5\u8981\u8655\u7406\uff0c\u800c\u6a21\u578b\u4e0a\u7dda\u5f8c\u9084\u662f\u8981\u6301\u7e8c\u76e3\u63a7\u4e26\u6536\u96c6\u65b0\u7684\u5834\u57df\u8cc7\u6599\u3002\u6700\u5f8c\u5c07\u8cc7\u6599\u6536\u96c6\u5230\u4e00\u5b9a\u7a0b\u5ea6\uff0c\u53c8\u56de\u5230\u9031\u671f\u7684\u7b2c\u4e00\u6b65\u91cd\u65b0\u8a13\u7df4\u65b0\u6a21\u578b\u3002\u81f3\u65bc\u6a21\u578b\u8a72\u5982\u4f55\u91cd\u65b0\u8a13\u7df4\u4e26\u4fdd\u6301\u8cc7\u6599\u7684\u96b1\u79c1\u6027\u5c31\u662f\u53e6\u4e00\u9580\u8b70\u984c\u3002\u9019\u6642\u5019\u6211\u5011\u5c31\u80fd\u63a1\u7528\u4e00\u500b\u6280\u8853\u53eb\u505a Federated Learning (\u806f\u5408\u5b78\u7fd2) \u60f3\u8fa6\u6cd5\u8655\u7406\u9019\u985e\u7684\u4e8b\u60c5\u3002 \u4f46\u662f\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u8a13\u7df4\u4e00\u500b\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\uff0c\u5728 MLOps \u7684\u9031\u671f\u4e2d\u50c5\u626e\u6f14\u5c0f\u5c0f\u7684\u4e00\u584a\u89d2\u8272\u3002\u4e0b\u5716\u662f\u4e00\u500b\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u7684\u57fa\u672c\u6d41\u7a0b\uff0c\u4e2d\u9593\u6a58\u8272\u7684\u90e8\u5206\u5c31\u662f AutoML \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u7684\u4e8b\u3002\u5f9e\u8cc7\u6599\u524d\u8655\u7406\u3001\u8a13\u7df4\u6a21\u578b\u5230\u8a55\u4f30\u6a21\u578b\u9700\u8981\u4e0d\u65b7\u5730\u7684\u9032\u884c\u8a66\u9a57\uff0c\u4e26\u4e14\u5617\u8a66\u5404\u7a2e\u4e0d\u540c\u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u6a21\u578b\u8d85\u53c3\u6578\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\uff0c\u90fd\u53ef\u4ee5\u900f\u904e AutoML \u81ea\u52d5\u5316\u7684\u8a13\u7df4\u627e\u5230\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002","title":"AutoML \u80fd\u5e6b\u52a9\u591a\u5c11\u4e8b\u60c5"},{"location":"19.AutoML/#_2","text":"\u6a5f\u5668\u5b78\u7fd2\u81ea\u52d5\u5316\u7684\u56f0\u96e3\u9ede\u5728\u65bc\u8cc7\u6599\u6e05\u6d17\u8207\u7279\u5fb5\u5de5\u7a0b\u6280\u5de7\u3002\u4e00\u500b\u597d\u7684\u7279\u5fb5\u8868\u9054\u53ef\u4ee5\u8b93\u6a21\u578b\u5feb\u901f\u5730\u6293\u5230\u95dc\u9375\u56e0\u5b50\uff0c\u4e26\u8b93\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u63d0\u5347\u3002\u6176\u5e78\u7684\u662f\u6a21\u578b\u6311\u9078\u548c\u8d85\u53c3\u6578\u8abf\u6574\u5df2\u7d93\u6709\u6bd4\u8f03\u6210\u719f\u7684\u65b9\u6cd5\u53ef\u4ee5\u5354\u52a9\u6211\u5011\u6709\u6548\u7684\u641c\u5c0b\u3002 Grid Search \u7db2\u683c\u641c\u7d22/\u7aae\u8209\u641c\u7d22 Random Search \u96a8\u6a5f\u641c\u7d22 Bayesian Optimization \u8c9d\u8449\u65af\u512a\u5316","title":"\u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5"},{"location":"19.AutoML/#grid-search","text":"Grid Search (\u7db2\u683c\u641c\u7d22) \u53c8\u7a31\u7aae\u8209\u641c\u7d22\u3002\u5b83\u7684\u641c\u7d22\u65b9\u5f0f\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u4e2d\uff0c\u900f\u904e\u6392\u5217\u7d44\u5408\u5617\u8a66\u6bcf\u4e00\u7a2e\u53ef\u80fd\u6027\u3002\u4e26\u5c07\u8868\u73fe\u6700\u597d\u7684\u53c3\u6578\u6700\u70ba\u6700\u7d42\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u7d50\u679c\u3002\u4ed6\u7684\u7f3a\u9ede\u5c31\u662f\u7576\u6709\u8a31\u591a\u8d85\u53c3\u6578\u8981\u5c0b\u627e\u6642\uff0c\u4ed6\u7684\u6392\u5217\u7d44\u5408\u5c31\u6703\u8b8a\u5f97\u975e\u5e38\u591a\uff0c\u5c0e\u81f4\u641c\u7d22\u7684\u6642\u9593\u8b8a\u9577\u82b1\u8cbb\u7684\u8cc7\u6e90\u4e5f\u8b8a\u5927\u3002\u56e0\u6b64\u9019\u7a2e\u66b4\u529b\u5f0f\u7684\u641c\u7d22\u65b9\u6cd5\u9069\u5408\u5728\u5c0f\u7684\u8cc7\u6599\u96c6\u4e0a\u88ab\u63a1\u7528\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u6709\u63d0\u4f9b GridSearchCV \u65b9\u6cd5\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u53c3\u6578\u5217\u8868\uff0c\u4e26\u900f\u904e\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u7d44\u5408\u4e00\u500b\u4e00\u500b\u5617\u8a66\u627e\u5230\u6700\u5408\u9069\u7684\u53c3\u6578\u3002 from sklearn import svm , datasets from sklearn.model_selection import GridSearchCV # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = datasets . load_iris () # \u8a2d\u5b9a\u60f3\u8981\u7684\u641c\u7d22\u53c3\u6578\u4e26\u7d66\u4e88\u5019\u9078\u503c parameters = { 'kernel' :( 'linear' , 'rbf' ), 'C' :[ 1 , 10 ]} # \u5efa\u7acb SVC \u5206\u985e\u5668 svc = svm . SVC () # \u7db2\u683c\u641c\u7d22\u6240\u6709\u53ef\u80fd\u7684\u7d44\u5408(2*2)\u5171\u56db\u7a2e clf = GridSearchCV ( svc , parameters ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b clf . fit ( iris . data , iris . target ) \u641c\u7d22\u7d50\u675f\u5f8c\u4e5f\u80fd\u5920\u904e cv_results_ \u67e5\u770b\u6240\u6709\u7d44\u5408\u7684\u8d85\u53c3\u6578\u6240\u5c0d\u61c9\u7684\u8a13\u7df4\u7d50\u679c\u3002 clf . cv_results_","title":"Grid Search"},{"location":"19.AutoML/#random-search","text":"Random Search (\u96a8\u6a5f\u641c\u7d22) \u6309\u7167\u5b57\u9762\u4e0a\u7684\u610f\u601d\u5c31\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u5019\u9078\u53c3\u6578\u4e2d\u96a8\u6a5f\u6311\u9078\u4e00\u500b\u6578\u503c\u4e26\u5617\u8a66\u3002\u5982\u679c\u9700\u8981\u8abf\u7684\u53c3\u6578\u8f03\u591a\u7684\u6642\u5019\uff0c\u4f7f\u7528\u96a8\u6a5f\u641c\u7d22\u53ef\u4ee5\u964d\u4f4e\u641c\u7d22\u6642\u9593\uff0c\u540c\u6642\u53c8\u80fd\u78ba\u4fdd\u4e00\u5b9a\u7684\u6a21\u578b\u6e96\u78ba\u6027\u3002\u5728 Sklearn \u5957\u4ef6\u4e2d\u4e5f\u6709\u63d0\u4f9b RandomizedSearchCV \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff0c\u8207\u7db2\u683c\u641c\u7d22\u7684\u5dee\u5225\u5728\u65bc\u4f7f\u7528\u8005\u53ef\u4ee5\u5c07\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d\u3002\u8a72\u65b9\u6cd5\u6703\u5728\u6b64\u7bc4\u570d\u4e2d\u96a8\u6a5f\u62bd\u4e00\u500b\u6578\u503c\u4e26\u9032\u884c\u6a21\u578b\u8a13\u7df4\u4e26\u9a57\u8b49\u6a21\u578b\u3002\u4e26\u627e\u51fa\u6240\u6709\u96a8\u6a5f\u7d44\u5408\u4e2d\u8868\u73fe\u6700\u597d\u7684\u4e00\u7d44\u8d85\u53c3\u6578\u3002 from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RandomizedSearchCV from scipy.stats import uniform # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = load_iris () # \u5efa\u7acb\u908f\u8f2f\u8ff4\u6b78\u6a21\u578b logistic = LogisticRegression ( solver = 'saga' , tol = 1e-2 , max_iter = 200 , random_state = 0 ) # \u8a2d\u5b9a\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u4e26\u7d66\u4e88\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d distributions = dict ( C = uniform ( loc = 0 , scale = 4 ), penalty = [ 'l2' , 'l1' ]) # \u96a8\u6a5f\u641c\u7d22\u9810\u8a2d n_iter=10 clf = RandomizedSearchCV ( logistic , distributions , random_state = 0 , n_iter = 10 ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b search = clf . fit ( iris . data , iris . target ) search . best_params_","title":"Random Search"},{"location":"19.AutoML/#bayesian-optimization","text":"Bayesian Optimization (\u8c9d\u8449\u65af\u512a\u5316) \u76ee\u6a19\u662f\u8981\u5728\u6700\u5c11\u7684\u8a66\u9a57\u4e0b\u5c0b\u627e\u4e00\u7d44\u6700\u4f73\u7684\u8d85\u53c3\u6578\u4f7f\u5f97\u932f\u8aa4\u7387\u80fd\u5920\u8d8a\u4f4e\u8d8a\u597d\u3002\u7531\u65bc\u6211\u5011\u6240\u6536\u96c6\u5230\u7684\u8cc7\u6599\u7121\u5f9e\u5f97\u77e5\u8a72\u6a21\u578b\u7684\u76ee\u6a19\u51fd\u6578\u662f\u9577\u600e\u6a23\uff0c\u56e0\u6b64\u6a5f\u5668\u5b78\u7fd2\u7684\u76ee\u7684\u5c31\u662f\u8981\u5f9e\u9019\u4e9b\u8cc7\u6599\u4e2d\u53bb\u64ec\u5408\u4e00\u500b\u51fd\u6578\uff0c\u76ee\u6a19\u662f\u7d66\u4e88\u4e00\u7b46\u8f38\u5165 X \u8a72\u51fd\u6578\u7684\u8f38\u51fa\u8981\u8207\u771f\u5be6\u7684\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u900f\u904e\u4ee3\u7406\u512a\u5316 (surrogate optimization) \u4f7f\u7528\u4e00\u500b\u4ee3\u7406\u51fd\u6578\u4f86\u4f30\u8a08\u76ee\u6a19\u51fd\u6578\u3002\u7c21\u55ae\u4f86\u8aaa\u4ee3\u7406\u51fd\u6578\u662f\u6307\u76ee\u6a19\u51fd\u6578\u7684\u4e00\u7a2e\u8fd1\u4f3c\uff0c\u6b64\u5916\u4ee3\u7406\u51fd\u6578\u53ef\u57fa\u65bc\u53d6\u6a23\u5f97\u5230\u7684\u8cc7\u6599\u9ede\u88ab\u69cb\u5efa\u51fa\u4f86\u3002 \u4ee3\u7406\u51fd\u6578\u7684\u76ee\u7684\u662f\u5728\u7d66\u5b9a\u4e00\u7d44\u7279\u5b9a\u7684\u5019\u9078\u8d85\u53c3\u6578\u7684\u60c5\u6cc1\u4e0b\u5feb\u901f\u4f30\u8a08\u5be6\u969b\u6a21\u578b\u7684\u932f\u8aa4\u7387\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u53ef\u4ee5\u5feb\u901f\u6c7a\u5b9a\u8a72\u7d44\u8d85\u53c3\u6578\u662f\u5426\u53ef\u4ee5\u88ab\u62ff\u4f86\u5be6\u969b\u8a13\u7df4\u6a21\u578b\u3002\u96a8\u8457\u8a66\u9a57\u6b21\u6578\u7684\u589e\u52a0\uff0c\u4ee3\u7406\u51fd\u6578\u96a8\u8457\u5148\u524d\u7684\u8a66\u9a57\u7d50\u679c\u800c\u66f4\u65b0\u6539\u9032\uff0c\u4e26\u958b\u59cb\u63a8\u85a6\u66f4\u597d\u7684\u5019\u9078\u8d85\u53c3\u6578\u3002 Auto-sklearn \u5c31\u662f\u4e00\u500b\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u7684\u4e00\u500b\u5de5\u5177\u3002\u540c\u6642\u5b83\u4e5f\u80fd\u641c\u7d22\u5728 Sklearn \u4e2d\u6240\u6709\u53ef\u80fd\u7684\u7b97\u6cd5\uff0c\u4e26\u70ba\u4f60\u63a8\u85a6\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u8207\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u3002\u660e\u5929\u6211\u5011\u5c31\u4f86\u4e00\u63a2\u7a76\u7adf\u8a72\u5957\u4ef6\u80cc\u5f8c\u7684\u795e\u79d8\u539f\u7406\u4ee5\u53ca\u7a0b\u5f0f\u5be6\u4f5c\u5427\uff01","title":"Bayesian Optimization"},{"location":"19.AutoML/#reference","text":"automl.org Sklearn \u5b98\u65b9\u6587\u4ef6 GridSearchCV Sklearn \u5b98\u65b9\u6587\u4ef6 RandomizedSearchCV \u7528\u7c21\u55ae\u8853\u8a9e\u8b93\u4f60\u770b\u5230\u8c9d\u8449\u65af\u512a\u5316\u4e4b\u7f8e \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/","text":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u6a5f\u5668\u662f\u5982\u4f55\u5b78\u7fd2\u7684 \u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 AI \u8207\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u6b63\u5728\u84ec\u52c3\u767c\u5c55\u4e2d\uff0c\u4f60\u80fd\u60f3\u50cf\u4eba\u5de5\u667a\u6167\u66fe\u88ab\u8a8d\u70ba\u662f\u4e00\u500b\u6beb\u7121\u51fa\u8def\u7684\u9818\u57df\u55ce? \u5f9e\u4eba\u5de5\u667a\u6167\u7684\u6642\u9593\u8ef8\u4f86\u770b\u53ef\u4ee5\u5206\u70ba\u4e09\u500b\u71b1\u6f6e\u3002\u7b2c\u4e00\u6b21\u71b1\u6f6e\uff081950~1960\u5e74\uff09\uff0c\u7531\u65bc\u65e9\u671f\u7684\u96fb\u8166\u786c\u9ad4\u8cc7\u6e90\u7684\u4e0d\u8db3\u5c0e\u81f4\u8907\u96dc\u7684\u554f\u984c\u7121\u6cd5\u8f15\u6613\u7684\u89e3\u6c7a\u3002\u7b2c\u4e8c\u6b21\u71b1\u6f6e\uff081980~1990\u5e74\uff09\u5c07\u5e36\u6709\u77e5\u662f\u672c\u9ad4\u7684\u4ee3\u7406\u4eba\u653e\u5165\u6a5f\u5668\u4eba\u4e2d\u4f7f\u5177\u6709\u667a\u6167\uff0c\u4e5f\u5c31\u662f\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u3002\u4f46\u4eba\u985e\u8cc7\u6e90\u6709\u9650\u4e0d\u53ef\u80fd\u628a\u6240\u6709\u7684\u77e5\u8b58\u90fd\u9010\u4e00\u5730\u8f38\u5165\u5230\u96fb\u8166\u3002\u56e0\u6b64\u5927\u5bb6\u958b\u59cb\u601d\u8003\u6a5f\u5668\u662f\u5426\u80fd\u5920\u8b93\u4ed6\u81ea\u5df1\u53bb\u5b78\u7fd2\uff1f\u800c\u4e0d\u662f\u4eba\u985e\u4e00\u6627\u7684\u9935\u5165\u9019\u4e9b\u77e5\u8b58\u3002\u7b2c\u4e09\u6b21\u71b1\u6f6e\uff082000\u5e74~\u73fe\u5728\uff09\u7531\u65bc CPU\u3001GPU \u4ee5\u53ca\u96f2\u7aef\u904b\u7b97\u8cc7\u6e90\u666e\u53ca\uff0c\u65e9\u671f\u8907\u96dc\u96e3\u89e3\u7684\u6f14\u7b97\u6cd5\u9678\u7e8c\u53ef\u4ee5\u900f\u904e\u8d85\u7d1a\u96fb\u8166\u4f86\u89e3\u6c7a\u3002\u7576\u624b\u908a\u6709\u4e86\u5927\u91cf\u7684\u6578\u64da\u5c31\u80fd\u62ff\u4f86\u6a5f\u5668\u5b78\u7fd2\uff0c\u56e0\u6b64\u5927\u5bb6\u8e0f\u5165\u4e86\u5927\u6578\u64da\u4ee5\u53ca\u6df1\u5ea6\u5b78\u7fd2\u7684\u6642\u4ee3\u3002\u6642\u9593\u4e0d\u65b7\u7684\u5f80\u524d\u8d70\uff0c\u4f60\u80fd\u60f3\u50cf\u672a\u4f86\u7684 AI \u5728\u4e16\u754c\u4e0a\u662f\u626e\u6f14\u4ec0\u9ebc\u6a23\u7684\u89d2\u8272\u55ce\uff1f \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u73fe\u4eca\u4eba\u5de5\u667a\u6167\u8207\u6211\u5011\u751f\u6d3b\u7121\u6240\u4e0d\u5728\uff0c\u4f8b\u5982\u6211\u5011\u53ea\u8981\u5c0d\u8457\u624b\u6a5f\u558a\u4e00\u8072\u300cHey Siri !\u300d\u860b\u679c\u624b\u6a5f\u7684\u8a9e\u97f3\u52a9\u7406\u5c31\u80fd\u5e6b\u4f60\u6253\u7406\u597d\u5927\u5c0f\u4e8b\u3002\u6216\u8005\u6b63\u5728\u8d85\u5e02\u8cfc\u7269\u7684\u4f60\u6b63\u5728\u70ba\u8cfc\u8cb7\u54ea\u4e00\u9805\u5546\u54c1\u7169\u60f1\u6642\uff0c\u63a8\u85a6\u7cfb\u7d71\u6a5f\u5668\u4eba\u80fd\u5920\u5373\u6642\u5730\u70ba\u4f60\u505a\u5546\u54c1\u63a8\u85a6\u3002\u770b\u4f3c\u8457\u7c21\u55ae\u7684\u52d5\u4f5c\uff0c\u4f46\u4eba\u5de5\u667a\u6167\u7684\u60c5\u666f\u5728\u4f60\u6211\u65e5\u5e38\u751f\u6d3b\u4e2d\u606f\u606f\u76f8\u95dc\u3002\u4eba\u5de5\u667a\u6167\u4f9d\u7167\u6a5f\u5668\u80fd\u5920\u8655\u7406\u8207\u5224\u65b7\u7684\u80fd\u529b\u5340\u5206\u70ba\u56db\u500b\u5206\u7d1a\uff0c\u5206\u5225\u70ba\u81ea\u52d5\u63a7\u5236\u3001\u63a2\u7d22\u63a8\u8ad6\u3001\u6a5f\u5668\u5b78\u7fd2\u3001\u6df1\u5ea6\u5b78\u7fd2\uff1a \u7b2c\u4e00\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u81ea\u52d5\u63a7\u5236 \u6a5f\u5668\u542b\u6709\u81ea\u52d5\u63a7\u5236\u7684\u529f\u80fd\uff0c\u4e26\u4e14\u7d93\u7531\u611f\u6e2c\u5668\u5075\u6e2c\u74b0\u5883\u7684\u8cc7\u8a0a\u3002\u4f8b\u5982\u900f\u904e\u6eab\u5ea6\u611f\u6e2c\u5668\u4f86\u5075\u6e2c\u7522\u7dda\u7684\u99ac\u9054\u662f\u5426\u904e\u71b1\uff0c\u4e26\u9054\u5230\u505c\u6b62\u904b\u8f49\u6548\u679c\u3002\u6216\u662f\u51b7\u6c23\u4f4e\u65bc20\u5ea6\u6642\u5c31\u9032\u5165\u5f85\u6a5f\u6a21\u5f0f\u2026\u2026\u7b49\u3002\u56e0\u6b64\u7a0b\u5f0f\u8a2d\u8a08\u5e2b\u5fc5\u9808\u5148\u628a\u6240\u6709\u53ef\u80fd\u7684\u60c5\u6cc1\u90fd\u8003\u616e\u9032\u53bb\u624d\u80fd\u5beb\u51fa\u63a7\u5236\u7a0b\u5f0f\u3002\u9019\u5c31\u884d\u4f38\u51fa\u4e00\u4e9b\u554f\u984c\uff0c\u50cf\u662f\u9748\u6d3b\u5ea6\u4e0d\u9ad8\uff0c\u4e14\u9700\u8981\u6709\u7d93\u9a57\u7684\u5c08\u5bb6\u4ecb\u5165\u624d\u80fd\u5b8c\u6210\u3002 \u7b2c\u4e8c\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u63a2\u7d22\u63a8\u8ad6 \u7b2c\u4e8c\u7d1a\u9010\u6f38\u958b\u59cb\u5f37\u8abf\u908f\u8f2f\u63a8\u7406\uff0c\u53ef\u4ee5\u8aaa\u662f\u88dc\u8db3\u7b2c\u4e00\u7d1a\u7684\u554f\u984c\u3002\u900f\u904e\u5c07\u77e5\u8b58\u7d44\u7e54\u6210\u77e5\u8b58\u672c\u9ad4\u4e26\u8b93\u6a5f\u5668\u5f9e\u73fe\u6709\u7684\u8cc7\u8a0a\u4e2d\u53bb\u63a8\u7406\u3002\u5178\u578b\u7684\u4f8b\u5b50\u5c31\u662f\u5c08\u5bb6\u7cfb\u7d71\uff0c\u5b83\u662f\u900f\u904e\u7279\u5b9a\u9818\u57df\u7684\u5c08\u5bb6\u8a02\u5b9a\u51fa\u4e00\u5957\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\uff0c\u4e26\u7522\u751f\u5927\u91cf\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7684\u6392\u5217\u7d44\u5408\u4f86\u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\u3002\u7576\u7136\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u5c31\u5fc5\u9808\u9080\u8acb\u9818\u57df\u7684\u5c08\u5bb6\u70ba\u7cfb\u7d71\u91cf\u8eab\u6253\u9020\u4e00\u5957\u7368\u4e00\u7121\u4e8c\u7684\u898f\u5247\u3002\u7136\u800c\u6bcf\u500b\u4eba\u7684\u89c0\u9ede\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u56e0\u6b64\u4e0d\u540c\u5c08\u5bb6\u9593\u6240\u5236\u5b9a\u7684\u898f\u5247\u53ef\u80fd\u90fd\u4e0d\u592a\u4e00\u6a23\u3002 \u7b2c\u4e09\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6a5f\u5668\u5b78\u7fd2 \u6a5f\u5668\u53ef\u4ee5\u6839\u64da\u8cc7\u6599\u5b78\u7fd2\u5982\u4f55\u5c07\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7522\u751f\u95dc\u806f\u3002\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u4e26\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u554f\u984c\u7684\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u61c9\u7528\u5305\u62ec\u641c\u5c0b\u5f15\u64ce\u3001\u5927\u6578\u64da\u5206\u6790\u7b49\u3002\u6211\u5011\u4f9d\u64da\u8cc7\u6599\u8207\u5b78\u7fd2\u65b9\u5f0f\u53ef\u5927\u81f4\u5206\u70ba\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u589e\u5f37\u5f0f\u5b78\u7fd2\uff0c\u6b64\u5916\u81ea\u76e3\u7763\u5b78\u7fd2\u9019\u500b\u540d\u8a5e\u6700\u8fd1\u4e5f\u71b1\u70c8\u7684\u8a0e\u8ad6\u4e2d\u3002 \u7b2c\u56db\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6df1\u5ea6\u5b78\u7fd2 \u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u3002\u5b83\u85c9\u7531\u6a21\u4eff\u4eba\u985e\u5927\u8166\u795e\u7d93\u5143\u7684\u7d50\u69cb\uff0c\u5b9a\u7fa9\u89e3\u6c7a\u554f\u984c\u7684\u51fd\u5f0f\u3002\u6240\u8b02\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u5177\u6709\u6df1\u5ea6\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u3002\u6a5f\u5668\u53ef\u4ee5\u81ea\u884c\u5b78\u7fd2\u4e26\u4e14\u7406\u89e3\u6a5f\u5668\u5b78\u7fd2\u6642\u7528\u4ee5\u8868\u793a\u8cc7\u6599\u7684\u300c\u7279\u5fb5\u300d\uff0c\u56e0\u6b64\u53c8\u7a31\u70ba\u300c\u7279\u5fb5\u8868\u9054\u5b78\u7fd2\u300d\uff0c\u5176\u61c9\u7528\u5305\u62ec\uff1a\u5f71\u50cf\u5206\u985e\u3001\u6a5f\u5668\u7ffb\u8b6f...\u7b49\u3002 \u6a5f\u5668\u5982\u4f55\u5b78\u7fd2\uff1f \u76e3\u7763\u5f0f\u5b78\u7fd2 (Supervised Learning) \u6240\u8b02\u7684\u76e3\u7763\u5f0f\u5b78\u7fd2\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4e26\u7d66\u8207\u7b54\u6848\uff0c\u900f\u904e\u640d\u5931\u51fd\u6578\u8a08\u7b97\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u89e3\u3002\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u6bd4\u5982\u7d66\u6a5f\u5668\u5404\u770b\u4e86 1000 \u5f35\u8c93\u548c\u72d7\u7684\u7167\u7247\u5f8c\u518d\u8a62\u554f\u6a5f\u5668\u65b0\u7684\u4e00\u5f35\u7167\u7247\u4e2d\u662f\u8c93\u9084\u662f\u72d7\u3002\u4e00\u76f4\u4e0d\u65b7\u7684\u8fed\u4ee3\u8a13\u7df4\u4e26\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u6a5f\u5668\u80fd\u6210\u529f\u7684\u5206\u985e\u4e86\u3002 \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 (Unsupervised Learning) \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u53ea\u7d66\u5b9a\u7279\u5fb5\uff0c\u6a5f\u5668\u6703\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u6700\u5e38\u898b\u7684\u65b9\u6cd5\u5c31\u662f\u96c6\u7fa4\u5206\u6790(Cluster Analysis)\uff0c\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u5c07\u8cc7\u6599\u6a23\u672c\u5206\u70ba\u5e7e\u7fa4\u3002\u7c21\u55ae\u4f86\u8aaa\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u5c31\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4f46\u4e0d\u7d66\u4e88\u7b54\u6848\uff0c\u6a21\u578b\u6703\u5f9e\u8cc7\u6599\u4e2d\u81ea\u5df1\u53bb\u627e\u51fa\u95dc\u4fc2\u3002\u900f\u904e\u5206\u7fa4\u6f14\u7b97\u6cd5\u4f86\u8a08\u7b97\u8cc7\u6599\u8207\u8cc7\u6599\u9593\u7684\u76f8\u4f3c\u7a0b\u5ea6\u8207\u8ddd\u96e2\u3002 \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 (Semi-Supervised Learning) \u4ecb\u65bc\u76e3\u7763\u5f0f\u5b78\u7fd2\u8207\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u4e4b\u9593\u3002\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\uff0c\u672a\u6a19\u8a18\u6a23\u672c\u591a\u3001\u6709\u6a19\u8a18\u6a23\u672c\u5c11\u662f\u4e00\u500b\u6bd4\u50f9\u666e\u904d\u73fe\u8c61\uff0c\u5982\u4f55\u5229\u7528\u597d\u672a\u6a19\u8a18\u6a23\u672c\u4f86\u63d0\u5347\u6a21\u578b\u6cdb\u5316\u80fd\u529b\uff0c\u5c31\u662f\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7814\u7a76\u7684\u91cd\u9ede\u3002\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7684\u61c9\u7528\u4e3b\u8981\u5728\u65bc\u6536\u96c6\u8cc7\u6599\u5f88\u7c21\u55ae\uff0c\u4f46\u6a19\u8a18\u7684\u8cc7\u6599\u592a\u5c11\u4e86\uff0c\u6211\u5011\u5e0c\u671b\u53ef\u4ee5\u81ea\u52d5\u6a19\u8a18\u8cc7\u6599\u3002 \u5f37\u5316\u5f0f\u5b78\u7fd2 (Reinforcement Learning) \u5728\u5f37\u5316\u5f0f\u5b78\u7fd2\u4e2d\u6a5f\u5668\u6703\u9032\u884c\u4e00\u7cfb\u5217\u7684\u52d5\u4f5c\uff0c\u800c\u6bcf\u505a\u4e00\u500b\u52d5\u4f5c\u3001\u74b0\u5883\u90fd\u6703\u8ddf\u8457\u767c\u751f\u8b8a\u5316\u3002\u82e5\u74b0\u5883\u7684\u8b8a\u5316\u662f\u96e2\u76ee\u6a19\u66f4\u63a5\u8fd1\uff0c\u6211\u5011\u5c31\u6703\u7d66\u4e88\u4e00\u500b\u6b63\u5411\u53cd\u994b\u3002\u82e5\u96e2\u76ee\u6a19\u66f4\u9060\uff0c\u5247\u7d66\u4e88\u8ca0\u5411\u53cd\u994b\u3002\u6a5f\u5668\u900f\u904e\u4e0d\u65b7\u7684\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u5b78\u5230\u4e86\u5982\u4f55\u53bb\u89e3\u6c7a\u4e00\u4ef6\u4e8b\u60c5\u3002 \u81ea\u76e3\u7763\u5b78\u7fd2 (Self-Supervised Learning) \u81ea\u76e3\u7763\u5b78\u7fd2\u662f\u7531\u5377\u7a4d\u795e\u7d93\u4e4b\u7236 Yann LeCun \u65bc 2019 \u5e74\u6240\u63d0\u51fa\u4f86\u7684\u4e00\u7a2e\u5b78\u7fd2\u6a5f\u5236\u3002\u6b64\u5b78\u7fd2\u6a5f\u5236\u6a21\u4eff\u6a21\u4eff\u4eba\u985e\u7684\u5b78\u7fd2\u884c\u70ba\uff0c\u900f\u904e\u7576\u524d\u4efb\u52d9\u89c0\u5bdf\u6240\u5f97\u5230\u7684\u7279\u5fb5\uff0c\u4e26\u8a13\u7df4\u4e00\u500b\u76ee\u6a19\u4efb\u52d9\u7684\u6a21\u578b\u3002\u800c\u4e14\u5b78\u7fd2\u904e\u7a0b\u4e2d\u4e26\u4e0d\u4ef0\u8cf4\u4eba\u985e\u7d66\u5b9a\u7684\u6a19\u7c64\u3002\u7c21\u55ae\u4f86\u8aaa\u8a13\u7df4\u904e\u7a0b\u662f\u62ff\u4e00\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u900f\u904e\u975e\u76e3\u7763\u5f0f\u6280\u5de7 pre-text task \u8a13\u7df4\u597d\u6a21\u578b\uff0c\u8a13\u7df4\u5b8c\u6210\u5f8c\u518d\u63a5\u5230\u4e0b\u6e38\u4efb\u52d9\u505a\u6700\u5f8c\u7684\u6a21\u578b\u5fae\u8abf (fine tune)\u3002 \u5b78 AI \u8a72\u7528\u54ea\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\uff1f Python \u662f\u8fd1\u5e74\u4f86\u9ad8\u901f\u6210\u9577\u4e26\u4e14\u9010\u6f38\u666e\u53ca\u7684\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u4e5f\u53ef\u4ee5\u8aaa\u662f\u6700\u5bb9\u6613\u4e0a\u624b\u7684\u7a0b\u5f0f\u8a9e\u8a00\u4e4b\u4e00\u3002\u4e3b\u8981\u5728\u65bc\u5b83\u7684\u8a9e\u6cd5\u662f\u7c21\u5316\u800c\u4e0d\u8907\u96dc\u7684\uff0c\u540c\u6642\u5f37\u8abf\u7a0b\u5f0f\u78bc\u7684\u53ef\u8b80\u6027\u56e0\u6b64\u66f4\u80fd\u8cbc\u8fd1\u7a0b\u5f0f\u8a2d\u8a08\u8005\u7684\u601d\u7dad\u3002\u7576\u7136\u4e5f\u4e9b\u4eba\u4f7f\u7528 R \u8a9e\u8a00\u9032\u884c\u7d71\u8a08\u5206\u6790\u3001\u7e6a\u5716\u4ee5\u53ca\u8cc7\u6599\u63a2\u52d8\u751a\u81f3\u5efa\u6a21\u3002\u5982\u679c\u4f60\u6b63\u5728\u7336\u8c6b\u8981\u5165\u5751\u54ea\u4e00\u985e\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u7b46\u8005\u9019\u88e1\u63a8\u85a6 Python \u7a0b\u5f0f\u8a9e\u8a00\u3002\u56e0\u70ba Python \u4e0d\u50c5\u53ef\u4ee5\u9032\u884c\u8cc7\u6599\u5206\u6790\u3001\u6a5f\u5668\u5b78\u7fd2\u4e5f\u80fd\u9032\u884c\u524d/\u5f8c\u7aef\u958b\u767c\u3002\u53e6\u5916 Python \u6709\u8c50\u5bcc\u7684\u8a0e\u8ad6\u793e\u7fa4\u4ee5\u53ca\u8a31\u591a\u958b\u6e90\u5957\u4ef6\u652f\u63f4\uff0c\u5927\u5e45\u7684\u964d\u4f4e\u5b78\u7fd2\u9580\u6abb\u3002 \u8aaa\u4e86\u9019\u9ebc\u591a\uff01\u5927\u5bb6\u6e96\u5099\u597d\u4e86\u55ce\uff1f\u5feb\u6e96\u5099\u597d\u96fb\u8166\u8207\u7b46\u8a18\u672c\uff0c\u597d\u597d\u7684\u70ba\u81ea\u5df1\u9032\u884c\u4e09\u5341\u5929\u7684\u5145\u96fb\u5427\uff5e Let's Go! \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#day-2-ai","text":"","title":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_1","text":"\u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u6a5f\u5668\u662f\u5982\u4f55\u5b78\u7fd2\u7684","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_2","text":"AI \u8207\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u6b63\u5728\u84ec\u52c3\u767c\u5c55\u4e2d\uff0c\u4f60\u80fd\u60f3\u50cf\u4eba\u5de5\u667a\u6167\u66fe\u88ab\u8a8d\u70ba\u662f\u4e00\u500b\u6beb\u7121\u51fa\u8def\u7684\u9818\u57df\u55ce? \u5f9e\u4eba\u5de5\u667a\u6167\u7684\u6642\u9593\u8ef8\u4f86\u770b\u53ef\u4ee5\u5206\u70ba\u4e09\u500b\u71b1\u6f6e\u3002\u7b2c\u4e00\u6b21\u71b1\u6f6e\uff081950~1960\u5e74\uff09\uff0c\u7531\u65bc\u65e9\u671f\u7684\u96fb\u8166\u786c\u9ad4\u8cc7\u6e90\u7684\u4e0d\u8db3\u5c0e\u81f4\u8907\u96dc\u7684\u554f\u984c\u7121\u6cd5\u8f15\u6613\u7684\u89e3\u6c7a\u3002\u7b2c\u4e8c\u6b21\u71b1\u6f6e\uff081980~1990\u5e74\uff09\u5c07\u5e36\u6709\u77e5\u662f\u672c\u9ad4\u7684\u4ee3\u7406\u4eba\u653e\u5165\u6a5f\u5668\u4eba\u4e2d\u4f7f\u5177\u6709\u667a\u6167\uff0c\u4e5f\u5c31\u662f\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u3002\u4f46\u4eba\u985e\u8cc7\u6e90\u6709\u9650\u4e0d\u53ef\u80fd\u628a\u6240\u6709\u7684\u77e5\u8b58\u90fd\u9010\u4e00\u5730\u8f38\u5165\u5230\u96fb\u8166\u3002\u56e0\u6b64\u5927\u5bb6\u958b\u59cb\u601d\u8003\u6a5f\u5668\u662f\u5426\u80fd\u5920\u8b93\u4ed6\u81ea\u5df1\u53bb\u5b78\u7fd2\uff1f\u800c\u4e0d\u662f\u4eba\u985e\u4e00\u6627\u7684\u9935\u5165\u9019\u4e9b\u77e5\u8b58\u3002\u7b2c\u4e09\u6b21\u71b1\u6f6e\uff082000\u5e74~\u73fe\u5728\uff09\u7531\u65bc CPU\u3001GPU \u4ee5\u53ca\u96f2\u7aef\u904b\u7b97\u8cc7\u6e90\u666e\u53ca\uff0c\u65e9\u671f\u8907\u96dc\u96e3\u89e3\u7684\u6f14\u7b97\u6cd5\u9678\u7e8c\u53ef\u4ee5\u900f\u904e\u8d85\u7d1a\u96fb\u8166\u4f86\u89e3\u6c7a\u3002\u7576\u624b\u908a\u6709\u4e86\u5927\u91cf\u7684\u6578\u64da\u5c31\u80fd\u62ff\u4f86\u6a5f\u5668\u5b78\u7fd2\uff0c\u56e0\u6b64\u5927\u5bb6\u8e0f\u5165\u4e86\u5927\u6578\u64da\u4ee5\u53ca\u6df1\u5ea6\u5b78\u7fd2\u7684\u6642\u4ee3\u3002\u6642\u9593\u4e0d\u65b7\u7684\u5f80\u524d\u8d70\uff0c\u4f60\u80fd\u60f3\u50cf\u672a\u4f86\u7684 AI \u5728\u4e16\u754c\u4e0a\u662f\u626e\u6f14\u4ec0\u9ebc\u6a23\u7684\u89d2\u8272\u55ce\uff1f","title":"\u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_3","text":"\u73fe\u4eca\u4eba\u5de5\u667a\u6167\u8207\u6211\u5011\u751f\u6d3b\u7121\u6240\u4e0d\u5728\uff0c\u4f8b\u5982\u6211\u5011\u53ea\u8981\u5c0d\u8457\u624b\u6a5f\u558a\u4e00\u8072\u300cHey Siri !\u300d\u860b\u679c\u624b\u6a5f\u7684\u8a9e\u97f3\u52a9\u7406\u5c31\u80fd\u5e6b\u4f60\u6253\u7406\u597d\u5927\u5c0f\u4e8b\u3002\u6216\u8005\u6b63\u5728\u8d85\u5e02\u8cfc\u7269\u7684\u4f60\u6b63\u5728\u70ba\u8cfc\u8cb7\u54ea\u4e00\u9805\u5546\u54c1\u7169\u60f1\u6642\uff0c\u63a8\u85a6\u7cfb\u7d71\u6a5f\u5668\u4eba\u80fd\u5920\u5373\u6642\u5730\u70ba\u4f60\u505a\u5546\u54c1\u63a8\u85a6\u3002\u770b\u4f3c\u8457\u7c21\u55ae\u7684\u52d5\u4f5c\uff0c\u4f46\u4eba\u5de5\u667a\u6167\u7684\u60c5\u666f\u5728\u4f60\u6211\u65e5\u5e38\u751f\u6d3b\u4e2d\u606f\u606f\u76f8\u95dc\u3002\u4eba\u5de5\u667a\u6167\u4f9d\u7167\u6a5f\u5668\u80fd\u5920\u8655\u7406\u8207\u5224\u65b7\u7684\u80fd\u529b\u5340\u5206\u70ba\u56db\u500b\u5206\u7d1a\uff0c\u5206\u5225\u70ba\u81ea\u52d5\u63a7\u5236\u3001\u63a2\u7d22\u63a8\u8ad6\u3001\u6a5f\u5668\u5b78\u7fd2\u3001\u6df1\u5ea6\u5b78\u7fd2\uff1a","title":"\u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_4","text":"\u6a5f\u5668\u542b\u6709\u81ea\u52d5\u63a7\u5236\u7684\u529f\u80fd\uff0c\u4e26\u4e14\u7d93\u7531\u611f\u6e2c\u5668\u5075\u6e2c\u74b0\u5883\u7684\u8cc7\u8a0a\u3002\u4f8b\u5982\u900f\u904e\u6eab\u5ea6\u611f\u6e2c\u5668\u4f86\u5075\u6e2c\u7522\u7dda\u7684\u99ac\u9054\u662f\u5426\u904e\u71b1\uff0c\u4e26\u9054\u5230\u505c\u6b62\u904b\u8f49\u6548\u679c\u3002\u6216\u662f\u51b7\u6c23\u4f4e\u65bc20\u5ea6\u6642\u5c31\u9032\u5165\u5f85\u6a5f\u6a21\u5f0f\u2026\u2026\u7b49\u3002\u56e0\u6b64\u7a0b\u5f0f\u8a2d\u8a08\u5e2b\u5fc5\u9808\u5148\u628a\u6240\u6709\u53ef\u80fd\u7684\u60c5\u6cc1\u90fd\u8003\u616e\u9032\u53bb\u624d\u80fd\u5beb\u51fa\u63a7\u5236\u7a0b\u5f0f\u3002\u9019\u5c31\u884d\u4f38\u51fa\u4e00\u4e9b\u554f\u984c\uff0c\u50cf\u662f\u9748\u6d3b\u5ea6\u4e0d\u9ad8\uff0c\u4e14\u9700\u8981\u6709\u7d93\u9a57\u7684\u5c08\u5bb6\u4ecb\u5165\u624d\u80fd\u5b8c\u6210\u3002","title":"\u7b2c\u4e00\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u81ea\u52d5\u63a7\u5236"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_5","text":"\u7b2c\u4e8c\u7d1a\u9010\u6f38\u958b\u59cb\u5f37\u8abf\u908f\u8f2f\u63a8\u7406\uff0c\u53ef\u4ee5\u8aaa\u662f\u88dc\u8db3\u7b2c\u4e00\u7d1a\u7684\u554f\u984c\u3002\u900f\u904e\u5c07\u77e5\u8b58\u7d44\u7e54\u6210\u77e5\u8b58\u672c\u9ad4\u4e26\u8b93\u6a5f\u5668\u5f9e\u73fe\u6709\u7684\u8cc7\u8a0a\u4e2d\u53bb\u63a8\u7406\u3002\u5178\u578b\u7684\u4f8b\u5b50\u5c31\u662f\u5c08\u5bb6\u7cfb\u7d71\uff0c\u5b83\u662f\u900f\u904e\u7279\u5b9a\u9818\u57df\u7684\u5c08\u5bb6\u8a02\u5b9a\u51fa\u4e00\u5957\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\uff0c\u4e26\u7522\u751f\u5927\u91cf\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7684\u6392\u5217\u7d44\u5408\u4f86\u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\u3002\u7576\u7136\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u5c31\u5fc5\u9808\u9080\u8acb\u9818\u57df\u7684\u5c08\u5bb6\u70ba\u7cfb\u7d71\u91cf\u8eab\u6253\u9020\u4e00\u5957\u7368\u4e00\u7121\u4e8c\u7684\u898f\u5247\u3002\u7136\u800c\u6bcf\u500b\u4eba\u7684\u89c0\u9ede\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u56e0\u6b64\u4e0d\u540c\u5c08\u5bb6\u9593\u6240\u5236\u5b9a\u7684\u898f\u5247\u53ef\u80fd\u90fd\u4e0d\u592a\u4e00\u6a23\u3002","title":"\u7b2c\u4e8c\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u63a2\u7d22\u63a8\u8ad6"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_6","text":"\u6a5f\u5668\u53ef\u4ee5\u6839\u64da\u8cc7\u6599\u5b78\u7fd2\u5982\u4f55\u5c07\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7522\u751f\u95dc\u806f\u3002\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u4e26\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u554f\u984c\u7684\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u61c9\u7528\u5305\u62ec\u641c\u5c0b\u5f15\u64ce\u3001\u5927\u6578\u64da\u5206\u6790\u7b49\u3002\u6211\u5011\u4f9d\u64da\u8cc7\u6599\u8207\u5b78\u7fd2\u65b9\u5f0f\u53ef\u5927\u81f4\u5206\u70ba\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u589e\u5f37\u5f0f\u5b78\u7fd2\uff0c\u6b64\u5916\u81ea\u76e3\u7763\u5b78\u7fd2\u9019\u500b\u540d\u8a5e\u6700\u8fd1\u4e5f\u71b1\u70c8\u7684\u8a0e\u8ad6\u4e2d\u3002","title":"\u7b2c\u4e09\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6a5f\u5668\u5b78\u7fd2"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_7","text":"\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u3002\u5b83\u85c9\u7531\u6a21\u4eff\u4eba\u985e\u5927\u8166\u795e\u7d93\u5143\u7684\u7d50\u69cb\uff0c\u5b9a\u7fa9\u89e3\u6c7a\u554f\u984c\u7684\u51fd\u5f0f\u3002\u6240\u8b02\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u5177\u6709\u6df1\u5ea6\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u3002\u6a5f\u5668\u53ef\u4ee5\u81ea\u884c\u5b78\u7fd2\u4e26\u4e14\u7406\u89e3\u6a5f\u5668\u5b78\u7fd2\u6642\u7528\u4ee5\u8868\u793a\u8cc7\u6599\u7684\u300c\u7279\u5fb5\u300d\uff0c\u56e0\u6b64\u53c8\u7a31\u70ba\u300c\u7279\u5fb5\u8868\u9054\u5b78\u7fd2\u300d\uff0c\u5176\u61c9\u7528\u5305\u62ec\uff1a\u5f71\u50cf\u5206\u985e\u3001\u6a5f\u5668\u7ffb\u8b6f...\u7b49\u3002","title":"\u7b2c\u56db\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6df1\u5ea6\u5b78\u7fd2"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_8","text":"","title":"\u6a5f\u5668\u5982\u4f55\u5b78\u7fd2\uff1f"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#supervised-learning","text":"\u6240\u8b02\u7684\u76e3\u7763\u5f0f\u5b78\u7fd2\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4e26\u7d66\u8207\u7b54\u6848\uff0c\u900f\u904e\u640d\u5931\u51fd\u6578\u8a08\u7b97\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u89e3\u3002\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u6bd4\u5982\u7d66\u6a5f\u5668\u5404\u770b\u4e86 1000 \u5f35\u8c93\u548c\u72d7\u7684\u7167\u7247\u5f8c\u518d\u8a62\u554f\u6a5f\u5668\u65b0\u7684\u4e00\u5f35\u7167\u7247\u4e2d\u662f\u8c93\u9084\u662f\u72d7\u3002\u4e00\u76f4\u4e0d\u65b7\u7684\u8fed\u4ee3\u8a13\u7df4\u4e26\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u6a5f\u5668\u80fd\u6210\u529f\u7684\u5206\u985e\u4e86\u3002","title":"\u76e3\u7763\u5f0f\u5b78\u7fd2 (Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#unsupervised-learning","text":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u53ea\u7d66\u5b9a\u7279\u5fb5\uff0c\u6a5f\u5668\u6703\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u6700\u5e38\u898b\u7684\u65b9\u6cd5\u5c31\u662f\u96c6\u7fa4\u5206\u6790(Cluster Analysis)\uff0c\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u5c07\u8cc7\u6599\u6a23\u672c\u5206\u70ba\u5e7e\u7fa4\u3002\u7c21\u55ae\u4f86\u8aaa\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u5c31\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4f46\u4e0d\u7d66\u4e88\u7b54\u6848\uff0c\u6a21\u578b\u6703\u5f9e\u8cc7\u6599\u4e2d\u81ea\u5df1\u53bb\u627e\u51fa\u95dc\u4fc2\u3002\u900f\u904e\u5206\u7fa4\u6f14\u7b97\u6cd5\u4f86\u8a08\u7b97\u8cc7\u6599\u8207\u8cc7\u6599\u9593\u7684\u76f8\u4f3c\u7a0b\u5ea6\u8207\u8ddd\u96e2\u3002","title":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 (Unsupervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#semi-supervised-learning","text":"\u4ecb\u65bc\u76e3\u7763\u5f0f\u5b78\u7fd2\u8207\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u4e4b\u9593\u3002\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\uff0c\u672a\u6a19\u8a18\u6a23\u672c\u591a\u3001\u6709\u6a19\u8a18\u6a23\u672c\u5c11\u662f\u4e00\u500b\u6bd4\u50f9\u666e\u904d\u73fe\u8c61\uff0c\u5982\u4f55\u5229\u7528\u597d\u672a\u6a19\u8a18\u6a23\u672c\u4f86\u63d0\u5347\u6a21\u578b\u6cdb\u5316\u80fd\u529b\uff0c\u5c31\u662f\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7814\u7a76\u7684\u91cd\u9ede\u3002\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7684\u61c9\u7528\u4e3b\u8981\u5728\u65bc\u6536\u96c6\u8cc7\u6599\u5f88\u7c21\u55ae\uff0c\u4f46\u6a19\u8a18\u7684\u8cc7\u6599\u592a\u5c11\u4e86\uff0c\u6211\u5011\u5e0c\u671b\u53ef\u4ee5\u81ea\u52d5\u6a19\u8a18\u8cc7\u6599\u3002","title":"\u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 (Semi-Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#reinforcement-learning","text":"\u5728\u5f37\u5316\u5f0f\u5b78\u7fd2\u4e2d\u6a5f\u5668\u6703\u9032\u884c\u4e00\u7cfb\u5217\u7684\u52d5\u4f5c\uff0c\u800c\u6bcf\u505a\u4e00\u500b\u52d5\u4f5c\u3001\u74b0\u5883\u90fd\u6703\u8ddf\u8457\u767c\u751f\u8b8a\u5316\u3002\u82e5\u74b0\u5883\u7684\u8b8a\u5316\u662f\u96e2\u76ee\u6a19\u66f4\u63a5\u8fd1\uff0c\u6211\u5011\u5c31\u6703\u7d66\u4e88\u4e00\u500b\u6b63\u5411\u53cd\u994b\u3002\u82e5\u96e2\u76ee\u6a19\u66f4\u9060\uff0c\u5247\u7d66\u4e88\u8ca0\u5411\u53cd\u994b\u3002\u6a5f\u5668\u900f\u904e\u4e0d\u65b7\u7684\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u5b78\u5230\u4e86\u5982\u4f55\u53bb\u89e3\u6c7a\u4e00\u4ef6\u4e8b\u60c5\u3002","title":"\u5f37\u5316\u5f0f\u5b78\u7fd2 (Reinforcement Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#self-supervised-learning","text":"\u81ea\u76e3\u7763\u5b78\u7fd2\u662f\u7531\u5377\u7a4d\u795e\u7d93\u4e4b\u7236 Yann LeCun \u65bc 2019 \u5e74\u6240\u63d0\u51fa\u4f86\u7684\u4e00\u7a2e\u5b78\u7fd2\u6a5f\u5236\u3002\u6b64\u5b78\u7fd2\u6a5f\u5236\u6a21\u4eff\u6a21\u4eff\u4eba\u985e\u7684\u5b78\u7fd2\u884c\u70ba\uff0c\u900f\u904e\u7576\u524d\u4efb\u52d9\u89c0\u5bdf\u6240\u5f97\u5230\u7684\u7279\u5fb5\uff0c\u4e26\u8a13\u7df4\u4e00\u500b\u76ee\u6a19\u4efb\u52d9\u7684\u6a21\u578b\u3002\u800c\u4e14\u5b78\u7fd2\u904e\u7a0b\u4e2d\u4e26\u4e0d\u4ef0\u8cf4\u4eba\u985e\u7d66\u5b9a\u7684\u6a19\u7c64\u3002\u7c21\u55ae\u4f86\u8aaa\u8a13\u7df4\u904e\u7a0b\u662f\u62ff\u4e00\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u900f\u904e\u975e\u76e3\u7763\u5f0f\u6280\u5de7 pre-text task \u8a13\u7df4\u597d\u6a21\u578b\uff0c\u8a13\u7df4\u5b8c\u6210\u5f8c\u518d\u63a5\u5230\u4e0b\u6e38\u4efb\u52d9\u505a\u6700\u5f8c\u7684\u6a21\u578b\u5fae\u8abf (fine tune)\u3002","title":"\u81ea\u76e3\u7763\u5b78\u7fd2 (Self-Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#ai","text":"Python \u662f\u8fd1\u5e74\u4f86\u9ad8\u901f\u6210\u9577\u4e26\u4e14\u9010\u6f38\u666e\u53ca\u7684\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u4e5f\u53ef\u4ee5\u8aaa\u662f\u6700\u5bb9\u6613\u4e0a\u624b\u7684\u7a0b\u5f0f\u8a9e\u8a00\u4e4b\u4e00\u3002\u4e3b\u8981\u5728\u65bc\u5b83\u7684\u8a9e\u6cd5\u662f\u7c21\u5316\u800c\u4e0d\u8907\u96dc\u7684\uff0c\u540c\u6642\u5f37\u8abf\u7a0b\u5f0f\u78bc\u7684\u53ef\u8b80\u6027\u56e0\u6b64\u66f4\u80fd\u8cbc\u8fd1\u7a0b\u5f0f\u8a2d\u8a08\u8005\u7684\u601d\u7dad\u3002\u7576\u7136\u4e5f\u4e9b\u4eba\u4f7f\u7528 R \u8a9e\u8a00\u9032\u884c\u7d71\u8a08\u5206\u6790\u3001\u7e6a\u5716\u4ee5\u53ca\u8cc7\u6599\u63a2\u52d8\u751a\u81f3\u5efa\u6a21\u3002\u5982\u679c\u4f60\u6b63\u5728\u7336\u8c6b\u8981\u5165\u5751\u54ea\u4e00\u985e\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u7b46\u8005\u9019\u88e1\u63a8\u85a6 Python \u7a0b\u5f0f\u8a9e\u8a00\u3002\u56e0\u70ba Python \u4e0d\u50c5\u53ef\u4ee5\u9032\u884c\u8cc7\u6599\u5206\u6790\u3001\u6a5f\u5668\u5b78\u7fd2\u4e5f\u80fd\u9032\u884c\u524d/\u5f8c\u7aef\u958b\u767c\u3002\u53e6\u5916 Python \u6709\u8c50\u5bcc\u7684\u8a0e\u8ad6\u793e\u7fa4\u4ee5\u53ca\u8a31\u591a\u958b\u6e90\u5957\u4ef6\u652f\u63f4\uff0c\u5927\u5e45\u7684\u964d\u4f4e\u5b78\u7fd2\u9580\u6abb\u3002 \u8aaa\u4e86\u9019\u9ebc\u591a\uff01\u5927\u5bb6\u6e96\u5099\u597d\u4e86\u55ce\uff1f\u5feb\u6e96\u5099\u597d\u96fb\u8166\u8207\u7b46\u8a18\u672c\uff0c\u597d\u597d\u7684\u70ba\u81ea\u5df1\u9032\u884c\u4e09\u5341\u5929\u7684\u5145\u96fb\u5427\uff5e Let's Go! \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5b78 AI \u8a72\u7528\u54ea\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\uff1f"},{"location":"20.Auto-Sklearn/","text":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 Auto-sklearn \u904b\u4f5c\u539f\u7406 Meta Learning Bayesian Optimization Build Ensemble \u5be6\u4f5c Auto-sklearn \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u8a13\u7df4\uff0c\u4e26\u6bd4\u8f03\u5169\u7a2e\u4e0d\u540c\u7248\u672c\u7684 Auto-sklearn\u3002 \u4f7f\u7528 pipelineprofiler \u8996\u89ba\u5316 AutoML \u6a21\u578b\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 Auto-sklearn \u63a1\u7528\u5143\u5b78\u7fd2 (Meta Learning) \u9078\u64c7\u6a21\u578b\u548c\u8d85\u53c3\u6578\u512a\u5316\u7684\u65b9\u6cd5\u4f5c\u70ba\u641c\u5c0b\u6700\u4f73\u6a21\u578b\u7684\u91cd\u9ede\u3002\u6b64 AutoML \u5957\u4ef6\u4e3b\u8981\u662f\u641c\u5c0b\u6240\u6709 Sklearn \u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ee5\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u4e26\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316 (Bayesian Optimization) \u8207\u81ea\u52d5\u6574\u5408 (Ensemble Selection) \u7684\u67b6\u69cb\u5728\u6709\u9650\u6642\u9593\u5167\u641c\u5c0b\u6700\u4f73\u7684\u6a21\u578b\u3002\u7b2c\u4e00\u7248\u7684 Auto-sklearn \u65bc 2015 \u5e74\u767c\u8868\u5728 NIPS(Neural Information Processing Systems) \u6703\u8b70\u4e0a\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Efficient and Robust Automated Machine Learning \u3002\u6709\u5225\u65bc\u5176\u4ed6\u7684 AutoML \u65b9\u6cd5\uff0cAuto-sklearn \u63d0\u51fa\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\u6539\u5584\u4e86\u8c9d\u8449\u65af\u512a\u5316\u5728\u521d\u59cb\u51b7\u555f\u52d5\u7684\u7f3a\u9ede\uff0c\u4e26\u63d0\u4f9b\u4e00\u500b\u597d\u7684\u63a1\u6a23\u65b9\u5411\u66f4\u5feb\u901f\u5c0b\u627e\u6700\u4f73\u7684\u6a21\u578b[1]\u3002\u7b2c\u4e8c\u500b\u7248\u672c\u65bc 2020 \u5e74\u767c\u5e03\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning \u3002\u5728\u65b0\u7684\u7248\u672c\u4e2d\u4fee\u6539\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\uff0c\u4e26\u4e0d\u4f9d\u8cf4\u5143\u7279\u5fb5\u4f86\u9078\u64c7\u6a21\u578b\u9078\u64c7\u8207\u8abf\u53c3\u7b56\u7565\u3002\u800c\u662f\u5f15\u5165\u4e86\u4e00\u500b\u5143\u5b78\u7fd2\u7b56\u7565\u9078\u64c7\u5668\uff0c\u6839\u64da\u8cc7\u6599\u96c6\u4e2d\u7684\u6a23\u672c\u6578\u91cf\u548c\u7279\u5fb5\uff0c\u8a02\u5b9a\u4e86\u4e00\u500b\u6a21\u578b\u9078\u64c7\u7684\u7b56\u7565[3]\u3002 AutoML \u8996\u70ba CASH \u554f\u984c \u5728\u8ad6\u6587\u4e2d\u4f5c\u8005\u5c07 AutoML \u8996\u70ba\u6f14\u7b97\u6cd5\u9078\u64c7\u548c\u8d85\u53c3\u6578\u512a\u5316 (Combined Algorithm Selection and Hyperparameter, CASH) \u7684\u7d44\u5408\u6700\u4f73\u5316\u554f\u984c\u3002\u56e0\u70ba\u5728 AutoML \u9818\u57df\u7576\u4e2d\u5c07\u6703\u9762\u81e8\u5169\u500b\u554f\u984c\u3002\u7b2c\u4e00\u500b\u662f\u6c92\u6709\u4efb\u4f55\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u662f\u53ef\u4ee5\u4fdd\u8b49\u5728\u6240\u6709\u7684\u8cc7\u6599\u96c6\u4e2d\u8868\u73fe\u6700\u597d\uff0c\u56e0\u6b64\u6311\u9078\u4e00\u500b\u597d\u7684\u6f14\u7b97\u6cd5\u662f\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u7684\u9996\u8981\u4efb\u52d9\u3002\u7b2c\u4e8c\u8a31\u591a\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u5f80\u5f80\u4f9d\u8cf4\u65bc\u8d85\u53c3\u6578\uff0c\u900f\u904e\u4e0d\u540c\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u53ef\u4ee5\u53d6\u5f97\u66f4\u597d\u7684\u5b78\u7fd2\u7d50\u679c\u3002\u4f8b\u5982\u5728 SVM \u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e0d\u540c\u7684\u6838\u6280\u5de7\u8b93\u6a21\u578b\u5177\u6709\u975e\u7dda\u6027\u7684\u80fd\u529b\uff0c\u6216\u662f\u900f\u904e\u8d85\u53c3\u6578 C \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002\u7136\u800c\u8c9d\u8449\u65af\u512a\u5316\u5982\u4eca\u6210\u70ba AutoML \u8d85\u53c3\u6578\u641c\u5c0b\u7684\u91cd\u8981\u6838\u5fc3\u65b9\u6cd5\u3002 Auto-sklearn \u67b6\u69cb Auto-sklearn \u53ef\u4ee5\u88ab\u62ff\u4f86\u8655\u7406\u8ff4\u6b78\u548c\u5206\u985e\u7684\u554f\u984c\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u6240\u7e6a\u88fd\u7684\u67b6\u69cb\u5716\u3002\u6211\u5011\u53ef\u4ee5\u5c07 Auto-sklearn \u5207\u6210\u4e09\u500b\u90e8\u5206\uff0c\u5176\u4e2d\u7b2c\u4e00\u500b\u662f\u5f15\u5165\u5143\u5b78\u7fd2\u6a5f\u5236\u4f86\u6a21\u4eff\u5c08\u5bb6\u5728\u8655\u7406\u6a5f\u5668\u5b78\u7fd2\u7684\u5148\u9a57\u77e5\u8b58\u3002\u4e26\u63a1\u7528\u5143\u7279\u5fb5\u8b93\u6211\u5011\u66f4\u6709\u6548\u7387\u7684\u53bb\u6c7a\u5b9a\u5728\u65b0\u7684\u8cc7\u6599\u96c6\u4e2d\u8a72\u6311\u9078\u54ea\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u63a5\u8457\u6311\u597d\u6a21\u578b\u5f8c\u4e26\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u6311\u9078\u5408\u9069\u7684\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u4ee5\u53ca\u5617\u8a66\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\u3002\u6700\u5f8c\u6311\u9078\u5e7e\u500b\u4e0d\u932f\u7684\u6a21\u578b\u4e26\u900f\u904e\u6574\u9ad4\u5b78\u7fd2\u7684\u6280\u5de7\u9032\u884c\u6a21\u578b\u5806\u758a\uff0c\u5c07\u8868\u73fe\u4e0d\u932f\u7684\u6a21\u578b\u8f38\u51fa\u7d50\u679c\u505a\u4e00\u500b\u52a0\u6b0a\u548c\u6216\u662f\u6295\u7968\u3002 Meta Learning Bayesian Optimization Build Ensemble Meta Learning \u7576\u6211\u5011\u60f3\u5c0d\u65b0\u8cc7\u6599\u96c6\u505a\u5206\u985e\u6216\u8ff4\u6b78\u6642\uff0cAuto-sklearn \u6703\u5148\u63d0\u53d6\u5143\u7279\u5fb5\uff0c\u5177\u6709\u76f8\u4f3c\u5143\u7279\u5fb5\u7684\u8cc7\u6599\u96c6\u5728\u540c\u4e00\u7d44\u8d85\u53c3\u6578\u61c9\u8a72\u6703\u6709\u76f8\u4f3c\u7684\u8868\u73fe\u3002\u56e0\u6b64\u900f\u904e\u5143\u7279\u5fb5\u53ef\u4ee5\u6709\u6548\u5730\u8a55\u4f30\u5728\u65b0\u8cc7\u6599\u96c6\u4e0a\u61c9\u8a72\u4f7f\u7528\u54ea\u7a2e\u7b97\u6cd5\u3002\u5143\u5b78\u7fd2\u5728\u9019\u88e1\u7684\u76ee\u7684\u662f\u70ba\u4e86\u8981\u627e\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\u505a\u521d\u59cb\u5316\uff0c\u4f7f\u5176\u5728\u4e00\u958b\u59cb\u7684\u8868\u73fe\u512a\u65bc\u96a8\u6a5f\u7684\u65b9\u6cd5\u3002\u4e26\u63d0\u4f9b\u8c9d\u8449\u65af\u512a\u5316\u6709\u500b\u660e\u78ba\u7684\u65b9\u5411\u3002Auto-sklearn \u53c3\u8003\u4e86 OpenML 140 \u500b\u8cc7\u6599\u96c6\uff0c\u4e26\u5f59\u6574\u4e86 38 \u500b\u5143\u7279\u5fb5\uff0c\u4f8b\u5982\uff1a\u504f\u5ea6\u3001\u5cf0\u5ea6\u3001\u7279\u5fb5\u6578\u91cf\u3001\u985e\u5225\u6578\u91cf......\u7b49\u3002\u9996\u5148\u70ba\u9019 140 \u500b\u8cc7\u6599\u96c6\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316\u9032\u884c\u6a21\u578b\u8a13\u7df4\uff0c\u4e26\u5c07\u9019\u4e9b\u8cc7\u6599\u96c6\u5c0d\u61c9\u7684\u6a21\u578b\u8207\u6700\u4f73\u7684\u8d85\u53c3\u6578\u5132\u5b58\u8d77\u4f86\u3002\u7576\u6709\u65b0\u7684\u8cc7\u6599\u96c6\u9032\u4f86\u6642\u6703\u5148\u900f\u904e\u5143\u7279\u5fb5\u9032\u884c\u76f8\u4f3c\u5ea6\u5339\u914d\uff0c\u4e26\u5c07\u5339\u914d\u7a0b\u5ea6\u6700\u9ad8\u7684\u524d k \u500b\u8cc7\u6599\u96c6 (\u9810\u8a2dk=25) \u6240\u5c0d\u61c9\u7684\u6a21\u578b\u548c\u8d85\u53c3\u6578\u4f5c\u70ba\u8c9d\u8449\u65af\u512a\u5316\u7684\u521d\u59cb\u8a2d\u5b9a\u3002 Bayesian Optimization \u5728\u8c9d\u8449\u65af\u512a\u5316\u7576\u4e2d\u4e3b\u8981\u6703\u5c0b\u627e\u8a72\u8cc7\u6599\u96c6\u4e2d\u6700\u5408\u9069\u7684\u8cc7\u6599\u524d\u8655\u7406 (data pre-processors)\u3001\u7279\u5fb5\u524d\u8655\u7406 (feature pre-processors) \u8207\u5206\u985e/\u8ff4\u6b78\u6a21\u578b\u3002\u4ee5\u4e0a\u4e09\u5927\u985e\u5408\u8a08\u5171\u6709 110 \u500b\u8d85\u53c3\u6578\u5fc5\u9808\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u9069\u5408\u7684\u53c3\u6578\u7d44\u5408\u3002\u5176\u8c9d\u8449\u65af\u512a\u5316\u4e3b\u8981\u65b9\u6cd5\u662f\u900f\u904e\u5efa\u7acb\u76ee\u6a19\u51fd\u6578\u7684\u6a5f\u7387\u6a21\u578b\uff0c\u4e26\u7528\u5b83\u4f86\u9078\u64c7\u6700\u6709\u5e0c\u671b\u7684\u8d85\u53c3\u6578\u4f86\u8a55\u4f30\u771f\u5be6\u7684\u76ee\u6a19\u51fd\u6578\u3002 \u4ee5\u4e0b\u5167\u5bb9\u6458\u9304\u81ea Auto-sklearn v1.0 \u8ad6\u6587\u63d0\u4f9b\u7684\u5167\u5bb9 [1][2] Data Pre-processors \u5728\u8cc7\u6599\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86\u56db\u7a2e\u65b9\u6cd5\u3002\u5305\u542b\u7279\u5fb5\u7e2e\u653e\u3001\u586b\u88dc\u7f3a\u5931\u503c\u3001\u985e\u5225\u7279\u5fb5\u9032\u884c one-hot encoding \u8207\u8655\u7406\u76ee\u6a19\u8f38\u51fa\u985e\u5225\u6578\u91cf\u4e0d\u5e73\u8861\u554f\u984c\u3002 Data Pre-processors \u7279\u5fb5\u7e2e\u653e \u586b\u88dc\u7f3a\u5931\u503c one-hot encoding \u985e\u5225\u8cc7\u6599\u4e0d\u5e73\u8861 \u5728\u65b0\u7684\u7248\u672c\u4e2d\u591a\u4e86\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u65b9\u6cd5\uff0c\u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn data_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002 Feature Pre-processors \u5728\u7279\u5fb5\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86 12 \u7a2e\u7279\u5fb5\u8655\u7406\u7684\u6280\u5de7\uff0c\u7136\u800c\u5728\u773e\u591a\u65b9\u6cd5\u4e2d\u50c5\u6703\u6311\u9078\u5176\u4e2d\u4e00\u7a2e\u3002 \u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn feature_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002 Build Ensemble \u5728 Auto-sklearn \u8a13\u7df4\u968e\u6bb5\u6703\u7522\u751f\u8a31\u591a\u8868\u73fe\u512a\u826f\u7684\u6a21\u578b\uff0c\u6700\u7d42\u900f\u904e\u8caa\u5a6a\u6cd5\u7684 Bagging Ensemble Selection \u65b9\u6cd5\u4f86\u5408\u4f75\u591a\u500b\u6a21\u578b\u7d44\u5408\u6210\u4e00\u500b\u66f4\u5f37\u66f4\u5927\u7684\u6a21\u578b\uff0c\u4e26\u63d0\u9ad8\u9810\u6e2c\u7684\u6e96\u78ba\u6027\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u9032\u884c\u7684\u5be6\u9a57\uff0c\u5176\u4e2d\u6a6b\u8ef8\u70ba\u7a0b\u5f0f\u57f7\u884c\u6642\u9593\uff0c\u7e31\u8ef8\u70ba\u5728\u6642\u9593\u5167\u641c\u5c0b\u5230\u7684\u6700\u4f73\u6a21\u578b\u7684\u6392\u540d\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7da0\u8272\u7dda\u689d\u518d\u52a0\u5165\u4e86\u6574\u9ad4\u5b78\u7fd2\u6a5f\u5236\u8868\u73fe\u6548\u679c\u6bd4\u5c1a\u672a\u52a0\u5165\u7684\u85cd\u8272\u7dda\u689d\u5be6\u9a57\u4f86\u5f97\u597d\u3002\u4e26\u4e14\u5728\u77ed\u6642\u9593\u5167\u5c31\u53ef\u4ee5\u5f97\u5230\u4e0d\u932f\u7684\u7d50\u679c\u3002 \u5b89\u88dd Auto-sklearn \u76ee\u524d Auto-sklearn \u50c5\u652f\u63f4 Lunux \u7cfb\u7d71\u3002\u82e5\u6c92\u6709\u6b64\u7cfb\u7d71\u7684\u8b80\u8005\u53ef\u4ee5\u900f\u904e Colab \u9ad4\u9a57\u3002\u53e6\u5916\u82e5\u5b89\u88dd\u904e\u7a0b\u4e2d\u51fa\u73fe\u932f\u8aa4\uff0c\u5fc5\u9808\u5148\u78ba\u8a8d swig \u662f\u5426\u5df2\u5b8c\u6210\u5b89\u88dd\u3002 pip install auto-sklearn \u82e5\u4f7f\u7528 Colab \u57f7\u884c\uff0c\u5b89\u88dd\u5b8c\u6210\u5f8c\u9ede\u9078\u4e0a\u65b9\u5de5\u5177\u5217 Runtime -> Restart runtime \u91cd\u555f\u624d\u80fd\u6b63\u5e38\u57f7\u884c\u6b64\u5957\u4ef6\u3002 \u8f09\u5165\u8cc7\u6599\u96c6 \u672c\u6b21\u7bc4\u4f8b\u6cbf\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Auto-sklearn \u4f86\u641c\u5c0b\u6700\u4f73\u7684\u5206\u985e\u5668\u6a21\u578b\u3002\u6b64\u5916\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u89c0\u5bdf Auto-sklearn \u627e\u5230\u7684\u6700\u4f73\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8868\u73fe\uff0c\u4e26\u8207\u524d\u5e7e\u5929\u6240\u4ecb\u7d39\u7684\u90a3\u4e9b\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4f86\u505a\u6bd4\u8f03\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u6211\u5011\u6309\u7167\u82b1\u6735\u7a2e\u985e\u7684\u6578\u91cf\u5c0d\u8cc7\u6599\u96c6\u4ee5 7:3 \u7684\u6bd4\u4f8b\u5207\u5272\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53c3\u6578 stratify=y \u8a2d\u5b9a\u662f\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5c0d\u65bc\u4e09\u7a2e\u82b1\u6735\u985e\u5225\u7684\u6bd4\u4f8b\u5728\u9019\u5169\u500b\u5207\u51fa\u4f86\u7684\u8cc7\u6599\u96c6\u4e2d\u6bd4\u4f8b\u8981\u4e00\u6a23\uff0c\u4ee5\u514d\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4) Auto-sklearn \u4ee5\u4e0b\u662f\u6a21\u578b\u5e38\u7528\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u65b9\u6cd5\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 API \u6587\u4ef6 \u3002 Parameters: - time_left_for_this_task: \u641c\u5c0b\u6642\u9593(\u79d2)\uff0c\u9810\u8a2d3600\u79d2(6\u5206\u9418)\u3002 - per_run_time_limit: \u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u7684\u4e0a\u9650\u6642\u9593\uff0c\u9810\u8a2d\u70batime_left_for_this_task\u76841/10\u3002 - ensemble_size: \u6a21\u578b\u8f38\u51fa\u6578\u91cf\uff0c\u9810\u8a2d50\u3002 - resampling_strategy: \u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u70ba\u4e86\u907f\u514d\u904e\u64ec\u5408\uff0c\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49\u6a5f\u5236\u3002\u9810\u8a2d\u65b9\u6cd5\u70ba\u6700\u57fa\u672c\u7684 holdout\u3002 Attributes: - cv_results_: \u67e5\u8a62\u6a21\u578b\u641c\u5c0b\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6700\u4f73\u6a21\u578b\u7684\u8d85\u53c3\u6578\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - refit: \u4f7f\u7528 fit \u5c0b\u627e\u597d\u7684\u53c3\u6578\u5f8c\uff0c\u518d\u4f7f\u7528\u6240\u6709\u7684\u8cc7\u6599\u9032\u884c\u6700\u5f8c\u5fae\u8abf\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - leaderboard: \u986f\u793a k \u500b ensemble \u6a21\u578b\u4e26\u6392\u540d\u3002 \u9996\u5148\u6211\u5011\u4f86\u6e2c\u8a66\u7b2c\u4e00\u7248\u7684 Auto-sklearn\uff0c\u5efa\u7acb\u4e00\u500b\u5206\u985e\u5668\u985e\u578b\u7684\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u8a2d\u5b9a\u76f8\u95dc\u7684\u57f7\u884c\u53c3\u6578\u3002\u5728\u672c\u6b21\u5be6\u9a57\u4e2d\u6211\u5011\u8a2d\u5b9a\u6a21\u578b\u641c\u5c0b\u7e3d\u6642\u9593\u70ba 180 \u79d2\uff0c\u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u6642\u9593\u9650\u5236 40 \u79d2\u5167\u3002\u6b64\u5916\u8a2d\u5b9a resampling_strategy='cv' \u5373 K-Fold \u4ea4\u53c9\u9a57\u8b49\u3002\u6b64\u5916\u5fc5\u9808\u53e6\u5916\u8a2d\u5b9a resampling_strategy_arguments \u4e26\u7d66\u4e88 k=5\uff0c\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u4e94\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u4e94\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u4e94\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u56db\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002 import autosklearn.classification automlclassifierV1 = autosklearn . classification . AutoSklearnClassifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 , resampling_strategy = 'cv' , resampling_strategy_arguments = { 'folds' : 5 } ) automlclassifierV1 . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u4f86\u67e5\u770b\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8abf\u6574\u6a21\u578b\u8a13\u7df4\u6642\u9593\u4ee5\u53ca\u4e00\u4e9b\u63a7\u5236\u53c3\u6578\uff0c\u67e5\u770b\u662f\u5426\u6709\u6c92\u6709\u5e6b\u52a9\u6a21\u578b\u6e96\u78ba\u5ea6\u63d0\u5347\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV1 \u8a13\u7df4\u96c6: ' , automlclassifierV1 . score ( X_train , y_train )) print ( 'automlclassifierV1 \u6e2c\u8a66\u96c6: ' , automlclassifierV1 . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a automlclassifierV1 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV1 \u6e2c\u8a66\u96c6: 0.9111111111111111 \u4f7f\u7528 Auto-sklearn 2.0 \u5728\u7b2c\u4e8c\u7248\u7684 Auto-sklearn \u5c0d\u6a21\u578b\u641c\u5c0b\u9032\u884c\u4e86\u4e00\u4e9b\u512a\u5316\uff0c\u4e26\u4e14\u53ef\u4ee5\u81ea\u52d5\u641c\u5c0b\u597d\u7684\u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u56e0\u6b64\u6211\u5011\u4e0d\u7279\u5730\u53bb\u6307\u5b9a resampling_strategy \uff0c\u67e5\u770b\u8868\u73fe\u662f\u5426\u80fd\u5920\u63d0\u5347\u3002 from autosklearn.experimental.askl2 import AutoSklearn2Classifier automlclassifierV2 = AutoSklearn2Classifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 ) automlclassifierV2 . fit ( X_train , y_train ) # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV2 \u8a13\u7df4\u96c6: ' , automlclassifierV2 . score ( X_train , y_train )) print ( 'automlclassifierV2 \u6e2c\u8a66\u96c6: ' , automlclassifierV2 . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a automlclassifierV2 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV2 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u4f7f\u7528\u4e00\u6a23\u7684\u641c\u5c0b\u6642\u9593\u8207\u8a13\u7df4\u9650\u5236\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u8868\u73fe\u4e0d\u932f\u3002\u5169\u8005\u7684\u6e96\u78ba\u7387\u66f4\u63a5\u8fd1\u4e86\u3002\u9019\u6a23\u7684\u7d50\u679c\u7684\u78ba\u6bd4\u7cfb\u5217\u6559\u5b78\u6240\u4ecb\u7d39\u7684\u4efb\u4e00\u500b\u55ae\u4e00\u6a21\u578b\u9084\u4f86\u5f97\u597d\u3002 \u67e5\u770b\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd \u6211\u5011\u53ef\u4ee5\u4f7f\u7528\u6a21\u578b\u63d0\u4f9b\u7684\u65b9\u6cd5\u67e5\u770b\u6700\u7d42\u8a13\u7df4\u7d50\u679c\uff0c\u4e26\u67e5\u770b k \u500b Ensemble \u6a21\u578b\u7684\u8a13\u7df4\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd\u3002 automlclassifierV2 . leaderboard ( detailed = True , ensemble_only = True ) \u8f38\u51fa\u6a21\u578b \u5982\u679c\u60f3\u5c07 AutoML \u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\uff0c\u53ef\u4ee5\u900f\u904e joblib \u5c07\u6a21\u578b\u6253\u5305\u532f\u51fa\u3002 from joblib import dump , load # \u532f\u51fa\u6a21\u578b dump ( automlclassifierV2 , 'model.joblib' ) # \u532f\u5165\u6a21\u578b clf = load ( 'model.joblib' ) # \u6a21\u578b\u9810\u6e2c\u6e2c\u8a66 clf . predict ( X_test ) \u8996\u89ba\u5316 AutoML \u6a21\u578b \u9996\u5148\u5b89\u88dd pipelineprofiler \u3002 pip install pipelineprofiler \u900f\u904e PipelineProfiler \u5957\u4ef6\u53ef\u4ee5\u5f88\u5feb\u901f\u5730\u6aa2\u8996\u6a21\u578b\u8a13\u7df4\u7d50\u679c\uff0c\u4ee5\u53ca\u6bcf\u4e00\u500b Ensemble \u6a21\u578b\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u548c\u7279\u5fb5\u8655\u7406\u65b9\u6cd5\u3002 import PipelineProfiler profiler_data = PipelineProfiler . import_autosklearn ( automlclassifierV2 ) PipelineProfiler . plot_pipeline_matrix ( profiler_data ) Reference [1] Feurer, Matthias et al. Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [2] Feurer, Matthias et al. Supplementary Material for Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [3] Feurer, Matthias et al. Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning , arXiv, 2020. [4] Ono, Jorge et al. PipelineProfiler: A Visual Analytics Tool for the Exploration of AutoML Pipelines , arXiv, 2020. Auto Machine Learning\u7b46\u8a18- Bayesian Optimization A Quickstart Guide to Auto-Sklearn (AutoML) for Machine Learning Practitioners Auto-Sklearn: Scikit-Learn on Steroids \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn"},{"location":"20.Auto-Sklearn/#day-20-auto-sklearn","text":"","title":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn"},{"location":"20.Auto-Sklearn/#_1","text":"\u4e86\u89e3 Auto-sklearn \u904b\u4f5c\u539f\u7406 Meta Learning Bayesian Optimization Build Ensemble \u5be6\u4f5c Auto-sklearn \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u8a13\u7df4\uff0c\u4e26\u6bd4\u8f03\u5169\u7a2e\u4e0d\u540c\u7248\u672c\u7684 Auto-sklearn\u3002 \u4f7f\u7528 pipelineprofiler \u8996\u89ba\u5316 AutoML \u6a21\u578b\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"20.Auto-Sklearn/#_2","text":"Auto-sklearn \u63a1\u7528\u5143\u5b78\u7fd2 (Meta Learning) \u9078\u64c7\u6a21\u578b\u548c\u8d85\u53c3\u6578\u512a\u5316\u7684\u65b9\u6cd5\u4f5c\u70ba\u641c\u5c0b\u6700\u4f73\u6a21\u578b\u7684\u91cd\u9ede\u3002\u6b64 AutoML \u5957\u4ef6\u4e3b\u8981\u662f\u641c\u5c0b\u6240\u6709 Sklearn \u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ee5\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u4e26\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316 (Bayesian Optimization) \u8207\u81ea\u52d5\u6574\u5408 (Ensemble Selection) \u7684\u67b6\u69cb\u5728\u6709\u9650\u6642\u9593\u5167\u641c\u5c0b\u6700\u4f73\u7684\u6a21\u578b\u3002\u7b2c\u4e00\u7248\u7684 Auto-sklearn \u65bc 2015 \u5e74\u767c\u8868\u5728 NIPS(Neural Information Processing Systems) \u6703\u8b70\u4e0a\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Efficient and Robust Automated Machine Learning \u3002\u6709\u5225\u65bc\u5176\u4ed6\u7684 AutoML \u65b9\u6cd5\uff0cAuto-sklearn \u63d0\u51fa\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\u6539\u5584\u4e86\u8c9d\u8449\u65af\u512a\u5316\u5728\u521d\u59cb\u51b7\u555f\u52d5\u7684\u7f3a\u9ede\uff0c\u4e26\u63d0\u4f9b\u4e00\u500b\u597d\u7684\u63a1\u6a23\u65b9\u5411\u66f4\u5feb\u901f\u5c0b\u627e\u6700\u4f73\u7684\u6a21\u578b[1]\u3002\u7b2c\u4e8c\u500b\u7248\u672c\u65bc 2020 \u5e74\u767c\u5e03\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning \u3002\u5728\u65b0\u7684\u7248\u672c\u4e2d\u4fee\u6539\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\uff0c\u4e26\u4e0d\u4f9d\u8cf4\u5143\u7279\u5fb5\u4f86\u9078\u64c7\u6a21\u578b\u9078\u64c7\u8207\u8abf\u53c3\u7b56\u7565\u3002\u800c\u662f\u5f15\u5165\u4e86\u4e00\u500b\u5143\u5b78\u7fd2\u7b56\u7565\u9078\u64c7\u5668\uff0c\u6839\u64da\u8cc7\u6599\u96c6\u4e2d\u7684\u6a23\u672c\u6578\u91cf\u548c\u7279\u5fb5\uff0c\u8a02\u5b9a\u4e86\u4e00\u500b\u6a21\u578b\u9078\u64c7\u7684\u7b56\u7565[3]\u3002","title":"\u524d\u8a00"},{"location":"20.Auto-Sklearn/#automl-cash","text":"\u5728\u8ad6\u6587\u4e2d\u4f5c\u8005\u5c07 AutoML \u8996\u70ba\u6f14\u7b97\u6cd5\u9078\u64c7\u548c\u8d85\u53c3\u6578\u512a\u5316 (Combined Algorithm Selection and Hyperparameter, CASH) \u7684\u7d44\u5408\u6700\u4f73\u5316\u554f\u984c\u3002\u56e0\u70ba\u5728 AutoML \u9818\u57df\u7576\u4e2d\u5c07\u6703\u9762\u81e8\u5169\u500b\u554f\u984c\u3002\u7b2c\u4e00\u500b\u662f\u6c92\u6709\u4efb\u4f55\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u662f\u53ef\u4ee5\u4fdd\u8b49\u5728\u6240\u6709\u7684\u8cc7\u6599\u96c6\u4e2d\u8868\u73fe\u6700\u597d\uff0c\u56e0\u6b64\u6311\u9078\u4e00\u500b\u597d\u7684\u6f14\u7b97\u6cd5\u662f\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u7684\u9996\u8981\u4efb\u52d9\u3002\u7b2c\u4e8c\u8a31\u591a\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u5f80\u5f80\u4f9d\u8cf4\u65bc\u8d85\u53c3\u6578\uff0c\u900f\u904e\u4e0d\u540c\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u53ef\u4ee5\u53d6\u5f97\u66f4\u597d\u7684\u5b78\u7fd2\u7d50\u679c\u3002\u4f8b\u5982\u5728 SVM \u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e0d\u540c\u7684\u6838\u6280\u5de7\u8b93\u6a21\u578b\u5177\u6709\u975e\u7dda\u6027\u7684\u80fd\u529b\uff0c\u6216\u662f\u900f\u904e\u8d85\u53c3\u6578 C \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002\u7136\u800c\u8c9d\u8449\u65af\u512a\u5316\u5982\u4eca\u6210\u70ba AutoML \u8d85\u53c3\u6578\u641c\u5c0b\u7684\u91cd\u8981\u6838\u5fc3\u65b9\u6cd5\u3002","title":"AutoML \u8996\u70ba CASH \u554f\u984c"},{"location":"20.Auto-Sklearn/#auto-sklearn","text":"Auto-sklearn \u53ef\u4ee5\u88ab\u62ff\u4f86\u8655\u7406\u8ff4\u6b78\u548c\u5206\u985e\u7684\u554f\u984c\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u6240\u7e6a\u88fd\u7684\u67b6\u69cb\u5716\u3002\u6211\u5011\u53ef\u4ee5\u5c07 Auto-sklearn \u5207\u6210\u4e09\u500b\u90e8\u5206\uff0c\u5176\u4e2d\u7b2c\u4e00\u500b\u662f\u5f15\u5165\u5143\u5b78\u7fd2\u6a5f\u5236\u4f86\u6a21\u4eff\u5c08\u5bb6\u5728\u8655\u7406\u6a5f\u5668\u5b78\u7fd2\u7684\u5148\u9a57\u77e5\u8b58\u3002\u4e26\u63a1\u7528\u5143\u7279\u5fb5\u8b93\u6211\u5011\u66f4\u6709\u6548\u7387\u7684\u53bb\u6c7a\u5b9a\u5728\u65b0\u7684\u8cc7\u6599\u96c6\u4e2d\u8a72\u6311\u9078\u54ea\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u63a5\u8457\u6311\u597d\u6a21\u578b\u5f8c\u4e26\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u6311\u9078\u5408\u9069\u7684\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u4ee5\u53ca\u5617\u8a66\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\u3002\u6700\u5f8c\u6311\u9078\u5e7e\u500b\u4e0d\u932f\u7684\u6a21\u578b\u4e26\u900f\u904e\u6574\u9ad4\u5b78\u7fd2\u7684\u6280\u5de7\u9032\u884c\u6a21\u578b\u5806\u758a\uff0c\u5c07\u8868\u73fe\u4e0d\u932f\u7684\u6a21\u578b\u8f38\u51fa\u7d50\u679c\u505a\u4e00\u500b\u52a0\u6b0a\u548c\u6216\u662f\u6295\u7968\u3002 Meta Learning Bayesian Optimization Build Ensemble","title":"Auto-sklearn \u67b6\u69cb"},{"location":"20.Auto-Sklearn/#meta-learning","text":"\u7576\u6211\u5011\u60f3\u5c0d\u65b0\u8cc7\u6599\u96c6\u505a\u5206\u985e\u6216\u8ff4\u6b78\u6642\uff0cAuto-sklearn \u6703\u5148\u63d0\u53d6\u5143\u7279\u5fb5\uff0c\u5177\u6709\u76f8\u4f3c\u5143\u7279\u5fb5\u7684\u8cc7\u6599\u96c6\u5728\u540c\u4e00\u7d44\u8d85\u53c3\u6578\u61c9\u8a72\u6703\u6709\u76f8\u4f3c\u7684\u8868\u73fe\u3002\u56e0\u6b64\u900f\u904e\u5143\u7279\u5fb5\u53ef\u4ee5\u6709\u6548\u5730\u8a55\u4f30\u5728\u65b0\u8cc7\u6599\u96c6\u4e0a\u61c9\u8a72\u4f7f\u7528\u54ea\u7a2e\u7b97\u6cd5\u3002\u5143\u5b78\u7fd2\u5728\u9019\u88e1\u7684\u76ee\u7684\u662f\u70ba\u4e86\u8981\u627e\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\u505a\u521d\u59cb\u5316\uff0c\u4f7f\u5176\u5728\u4e00\u958b\u59cb\u7684\u8868\u73fe\u512a\u65bc\u96a8\u6a5f\u7684\u65b9\u6cd5\u3002\u4e26\u63d0\u4f9b\u8c9d\u8449\u65af\u512a\u5316\u6709\u500b\u660e\u78ba\u7684\u65b9\u5411\u3002Auto-sklearn \u53c3\u8003\u4e86 OpenML 140 \u500b\u8cc7\u6599\u96c6\uff0c\u4e26\u5f59\u6574\u4e86 38 \u500b\u5143\u7279\u5fb5\uff0c\u4f8b\u5982\uff1a\u504f\u5ea6\u3001\u5cf0\u5ea6\u3001\u7279\u5fb5\u6578\u91cf\u3001\u985e\u5225\u6578\u91cf......\u7b49\u3002\u9996\u5148\u70ba\u9019 140 \u500b\u8cc7\u6599\u96c6\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316\u9032\u884c\u6a21\u578b\u8a13\u7df4\uff0c\u4e26\u5c07\u9019\u4e9b\u8cc7\u6599\u96c6\u5c0d\u61c9\u7684\u6a21\u578b\u8207\u6700\u4f73\u7684\u8d85\u53c3\u6578\u5132\u5b58\u8d77\u4f86\u3002\u7576\u6709\u65b0\u7684\u8cc7\u6599\u96c6\u9032\u4f86\u6642\u6703\u5148\u900f\u904e\u5143\u7279\u5fb5\u9032\u884c\u76f8\u4f3c\u5ea6\u5339\u914d\uff0c\u4e26\u5c07\u5339\u914d\u7a0b\u5ea6\u6700\u9ad8\u7684\u524d k \u500b\u8cc7\u6599\u96c6 (\u9810\u8a2dk=25) \u6240\u5c0d\u61c9\u7684\u6a21\u578b\u548c\u8d85\u53c3\u6578\u4f5c\u70ba\u8c9d\u8449\u65af\u512a\u5316\u7684\u521d\u59cb\u8a2d\u5b9a\u3002","title":"Meta Learning"},{"location":"20.Auto-Sklearn/#bayesian-optimization","text":"\u5728\u8c9d\u8449\u65af\u512a\u5316\u7576\u4e2d\u4e3b\u8981\u6703\u5c0b\u627e\u8a72\u8cc7\u6599\u96c6\u4e2d\u6700\u5408\u9069\u7684\u8cc7\u6599\u524d\u8655\u7406 (data pre-processors)\u3001\u7279\u5fb5\u524d\u8655\u7406 (feature pre-processors) \u8207\u5206\u985e/\u8ff4\u6b78\u6a21\u578b\u3002\u4ee5\u4e0a\u4e09\u5927\u985e\u5408\u8a08\u5171\u6709 110 \u500b\u8d85\u53c3\u6578\u5fc5\u9808\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u9069\u5408\u7684\u53c3\u6578\u7d44\u5408\u3002\u5176\u8c9d\u8449\u65af\u512a\u5316\u4e3b\u8981\u65b9\u6cd5\u662f\u900f\u904e\u5efa\u7acb\u76ee\u6a19\u51fd\u6578\u7684\u6a5f\u7387\u6a21\u578b\uff0c\u4e26\u7528\u5b83\u4f86\u9078\u64c7\u6700\u6709\u5e0c\u671b\u7684\u8d85\u53c3\u6578\u4f86\u8a55\u4f30\u771f\u5be6\u7684\u76ee\u6a19\u51fd\u6578\u3002 \u4ee5\u4e0b\u5167\u5bb9\u6458\u9304\u81ea Auto-sklearn v1.0 \u8ad6\u6587\u63d0\u4f9b\u7684\u5167\u5bb9 [1][2]","title":"Bayesian Optimization"},{"location":"20.Auto-Sklearn/#data-pre-processors","text":"\u5728\u8cc7\u6599\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86\u56db\u7a2e\u65b9\u6cd5\u3002\u5305\u542b\u7279\u5fb5\u7e2e\u653e\u3001\u586b\u88dc\u7f3a\u5931\u503c\u3001\u985e\u5225\u7279\u5fb5\u9032\u884c one-hot encoding \u8207\u8655\u7406\u76ee\u6a19\u8f38\u51fa\u985e\u5225\u6578\u91cf\u4e0d\u5e73\u8861\u554f\u984c\u3002 Data Pre-processors \u7279\u5fb5\u7e2e\u653e \u586b\u88dc\u7f3a\u5931\u503c one-hot encoding \u985e\u5225\u8cc7\u6599\u4e0d\u5e73\u8861 \u5728\u65b0\u7684\u7248\u672c\u4e2d\u591a\u4e86\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u65b9\u6cd5\uff0c\u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn data_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002","title":"Data Pre-processors"},{"location":"20.Auto-Sklearn/#feature-pre-processors","text":"\u5728\u7279\u5fb5\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86 12 \u7a2e\u7279\u5fb5\u8655\u7406\u7684\u6280\u5de7\uff0c\u7136\u800c\u5728\u773e\u591a\u65b9\u6cd5\u4e2d\u50c5\u6703\u6311\u9078\u5176\u4e2d\u4e00\u7a2e\u3002 \u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn feature_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002","title":"Feature Pre-processors"},{"location":"20.Auto-Sklearn/#build-ensemble","text":"\u5728 Auto-sklearn \u8a13\u7df4\u968e\u6bb5\u6703\u7522\u751f\u8a31\u591a\u8868\u73fe\u512a\u826f\u7684\u6a21\u578b\uff0c\u6700\u7d42\u900f\u904e\u8caa\u5a6a\u6cd5\u7684 Bagging Ensemble Selection \u65b9\u6cd5\u4f86\u5408\u4f75\u591a\u500b\u6a21\u578b\u7d44\u5408\u6210\u4e00\u500b\u66f4\u5f37\u66f4\u5927\u7684\u6a21\u578b\uff0c\u4e26\u63d0\u9ad8\u9810\u6e2c\u7684\u6e96\u78ba\u6027\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u9032\u884c\u7684\u5be6\u9a57\uff0c\u5176\u4e2d\u6a6b\u8ef8\u70ba\u7a0b\u5f0f\u57f7\u884c\u6642\u9593\uff0c\u7e31\u8ef8\u70ba\u5728\u6642\u9593\u5167\u641c\u5c0b\u5230\u7684\u6700\u4f73\u6a21\u578b\u7684\u6392\u540d\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7da0\u8272\u7dda\u689d\u518d\u52a0\u5165\u4e86\u6574\u9ad4\u5b78\u7fd2\u6a5f\u5236\u8868\u73fe\u6548\u679c\u6bd4\u5c1a\u672a\u52a0\u5165\u7684\u85cd\u8272\u7dda\u689d\u5be6\u9a57\u4f86\u5f97\u597d\u3002\u4e26\u4e14\u5728\u77ed\u6642\u9593\u5167\u5c31\u53ef\u4ee5\u5f97\u5230\u4e0d\u932f\u7684\u7d50\u679c\u3002","title":"Build Ensemble"},{"location":"20.Auto-Sklearn/#auto-sklearn_1","text":"\u76ee\u524d Auto-sklearn \u50c5\u652f\u63f4 Lunux \u7cfb\u7d71\u3002\u82e5\u6c92\u6709\u6b64\u7cfb\u7d71\u7684\u8b80\u8005\u53ef\u4ee5\u900f\u904e Colab \u9ad4\u9a57\u3002\u53e6\u5916\u82e5\u5b89\u88dd\u904e\u7a0b\u4e2d\u51fa\u73fe\u932f\u8aa4\uff0c\u5fc5\u9808\u5148\u78ba\u8a8d swig \u662f\u5426\u5df2\u5b8c\u6210\u5b89\u88dd\u3002 pip install auto-sklearn \u82e5\u4f7f\u7528 Colab \u57f7\u884c\uff0c\u5b89\u88dd\u5b8c\u6210\u5f8c\u9ede\u9078\u4e0a\u65b9\u5de5\u5177\u5217 Runtime -> Restart runtime \u91cd\u555f\u624d\u80fd\u6b63\u5e38\u57f7\u884c\u6b64\u5957\u4ef6\u3002","title":"\u5b89\u88dd Auto-sklearn"},{"location":"20.Auto-Sklearn/#_3","text":"\u672c\u6b21\u7bc4\u4f8b\u6cbf\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Auto-sklearn \u4f86\u641c\u5c0b\u6700\u4f73\u7684\u5206\u985e\u5668\u6a21\u578b\u3002\u6b64\u5916\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u89c0\u5bdf Auto-sklearn \u627e\u5230\u7684\u6700\u4f73\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8868\u73fe\uff0c\u4e26\u8207\u524d\u5e7e\u5929\u6240\u4ecb\u7d39\u7684\u90a3\u4e9b\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4f86\u505a\u6bd4\u8f03\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"\u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"20.Auto-Sklearn/#_4","text":"\u6211\u5011\u6309\u7167\u82b1\u6735\u7a2e\u985e\u7684\u6578\u91cf\u5c0d\u8cc7\u6599\u96c6\u4ee5 7:3 \u7684\u6bd4\u4f8b\u5207\u5272\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53c3\u6578 stratify=y \u8a2d\u5b9a\u662f\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5c0d\u65bc\u4e09\u7a2e\u82b1\u6735\u985e\u5225\u7684\u6bd4\u4f8b\u5728\u9019\u5169\u500b\u5207\u51fa\u4f86\u7684\u8cc7\u6599\u96c6\u4e2d\u6bd4\u4f8b\u8981\u4e00\u6a23\uff0c\u4ee5\u514d\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4)","title":"\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"20.Auto-Sklearn/#auto-sklearn_2","text":"\u4ee5\u4e0b\u662f\u6a21\u578b\u5e38\u7528\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u65b9\u6cd5\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 API \u6587\u4ef6 \u3002 Parameters: - time_left_for_this_task: \u641c\u5c0b\u6642\u9593(\u79d2)\uff0c\u9810\u8a2d3600\u79d2(6\u5206\u9418)\u3002 - per_run_time_limit: \u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u7684\u4e0a\u9650\u6642\u9593\uff0c\u9810\u8a2d\u70batime_left_for_this_task\u76841/10\u3002 - ensemble_size: \u6a21\u578b\u8f38\u51fa\u6578\u91cf\uff0c\u9810\u8a2d50\u3002 - resampling_strategy: \u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u70ba\u4e86\u907f\u514d\u904e\u64ec\u5408\uff0c\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49\u6a5f\u5236\u3002\u9810\u8a2d\u65b9\u6cd5\u70ba\u6700\u57fa\u672c\u7684 holdout\u3002 Attributes: - cv_results_: \u67e5\u8a62\u6a21\u578b\u641c\u5c0b\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6700\u4f73\u6a21\u578b\u7684\u8d85\u53c3\u6578\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - refit: \u4f7f\u7528 fit \u5c0b\u627e\u597d\u7684\u53c3\u6578\u5f8c\uff0c\u518d\u4f7f\u7528\u6240\u6709\u7684\u8cc7\u6599\u9032\u884c\u6700\u5f8c\u5fae\u8abf\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - leaderboard: \u986f\u793a k \u500b ensemble \u6a21\u578b\u4e26\u6392\u540d\u3002 \u9996\u5148\u6211\u5011\u4f86\u6e2c\u8a66\u7b2c\u4e00\u7248\u7684 Auto-sklearn\uff0c\u5efa\u7acb\u4e00\u500b\u5206\u985e\u5668\u985e\u578b\u7684\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u8a2d\u5b9a\u76f8\u95dc\u7684\u57f7\u884c\u53c3\u6578\u3002\u5728\u672c\u6b21\u5be6\u9a57\u4e2d\u6211\u5011\u8a2d\u5b9a\u6a21\u578b\u641c\u5c0b\u7e3d\u6642\u9593\u70ba 180 \u79d2\uff0c\u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u6642\u9593\u9650\u5236 40 \u79d2\u5167\u3002\u6b64\u5916\u8a2d\u5b9a resampling_strategy='cv' \u5373 K-Fold \u4ea4\u53c9\u9a57\u8b49\u3002\u6b64\u5916\u5fc5\u9808\u53e6\u5916\u8a2d\u5b9a resampling_strategy_arguments \u4e26\u7d66\u4e88 k=5\uff0c\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u4e94\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u4e94\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u4e94\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u56db\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002 import autosklearn.classification automlclassifierV1 = autosklearn . classification . AutoSklearnClassifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 , resampling_strategy = 'cv' , resampling_strategy_arguments = { 'folds' : 5 } ) automlclassifierV1 . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u4f86\u67e5\u770b\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8abf\u6574\u6a21\u578b\u8a13\u7df4\u6642\u9593\u4ee5\u53ca\u4e00\u4e9b\u63a7\u5236\u53c3\u6578\uff0c\u67e5\u770b\u662f\u5426\u6709\u6c92\u6709\u5e6b\u52a9\u6a21\u578b\u6e96\u78ba\u5ea6\u63d0\u5347\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV1 \u8a13\u7df4\u96c6: ' , automlclassifierV1 . score ( X_train , y_train )) print ( 'automlclassifierV1 \u6e2c\u8a66\u96c6: ' , automlclassifierV1 . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a automlclassifierV1 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV1 \u6e2c\u8a66\u96c6: 0.9111111111111111","title":"Auto-sklearn"},{"location":"20.Auto-Sklearn/#auto-sklearn-20","text":"\u5728\u7b2c\u4e8c\u7248\u7684 Auto-sklearn \u5c0d\u6a21\u578b\u641c\u5c0b\u9032\u884c\u4e86\u4e00\u4e9b\u512a\u5316\uff0c\u4e26\u4e14\u53ef\u4ee5\u81ea\u52d5\u641c\u5c0b\u597d\u7684\u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u56e0\u6b64\u6211\u5011\u4e0d\u7279\u5730\u53bb\u6307\u5b9a resampling_strategy \uff0c\u67e5\u770b\u8868\u73fe\u662f\u5426\u80fd\u5920\u63d0\u5347\u3002 from autosklearn.experimental.askl2 import AutoSklearn2Classifier automlclassifierV2 = AutoSklearn2Classifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 ) automlclassifierV2 . fit ( X_train , y_train ) # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV2 \u8a13\u7df4\u96c6: ' , automlclassifierV2 . score ( X_train , y_train )) print ( 'automlclassifierV2 \u6e2c\u8a66\u96c6: ' , automlclassifierV2 . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a automlclassifierV2 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV2 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u4f7f\u7528\u4e00\u6a23\u7684\u641c\u5c0b\u6642\u9593\u8207\u8a13\u7df4\u9650\u5236\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u8868\u73fe\u4e0d\u932f\u3002\u5169\u8005\u7684\u6e96\u78ba\u7387\u66f4\u63a5\u8fd1\u4e86\u3002\u9019\u6a23\u7684\u7d50\u679c\u7684\u78ba\u6bd4\u7cfb\u5217\u6559\u5b78\u6240\u4ecb\u7d39\u7684\u4efb\u4e00\u500b\u55ae\u4e00\u6a21\u578b\u9084\u4f86\u5f97\u597d\u3002","title":"\u4f7f\u7528 Auto-sklearn 2.0"},{"location":"20.Auto-Sklearn/#_5","text":"\u6211\u5011\u53ef\u4ee5\u4f7f\u7528\u6a21\u578b\u63d0\u4f9b\u7684\u65b9\u6cd5\u67e5\u770b\u6700\u7d42\u8a13\u7df4\u7d50\u679c\uff0c\u4e26\u67e5\u770b k \u500b Ensemble \u6a21\u578b\u7684\u8a13\u7df4\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd\u3002 automlclassifierV2 . leaderboard ( detailed = True , ensemble_only = True )","title":"\u67e5\u770b\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd"},{"location":"20.Auto-Sklearn/#_6","text":"\u5982\u679c\u60f3\u5c07 AutoML \u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\uff0c\u53ef\u4ee5\u900f\u904e joblib \u5c07\u6a21\u578b\u6253\u5305\u532f\u51fa\u3002 from joblib import dump , load # \u532f\u51fa\u6a21\u578b dump ( automlclassifierV2 , 'model.joblib' ) # \u532f\u5165\u6a21\u578b clf = load ( 'model.joblib' ) # \u6a21\u578b\u9810\u6e2c\u6e2c\u8a66 clf . predict ( X_test )","title":"\u8f38\u51fa\u6a21\u578b"},{"location":"20.Auto-Sklearn/#automl","text":"\u9996\u5148\u5b89\u88dd pipelineprofiler \u3002 pip install pipelineprofiler \u900f\u904e PipelineProfiler \u5957\u4ef6\u53ef\u4ee5\u5f88\u5feb\u901f\u5730\u6aa2\u8996\u6a21\u578b\u8a13\u7df4\u7d50\u679c\uff0c\u4ee5\u53ca\u6bcf\u4e00\u500b Ensemble \u6a21\u578b\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u548c\u7279\u5fb5\u8655\u7406\u65b9\u6cd5\u3002 import PipelineProfiler profiler_data = PipelineProfiler . import_autosklearn ( automlclassifierV2 ) PipelineProfiler . plot_pipeline_matrix ( profiler_data )","title":"\u8996\u89ba\u5316 AutoML \u6a21\u578b"},{"location":"20.Auto-Sklearn/#reference","text":"[1] Feurer, Matthias et al. Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [2] Feurer, Matthias et al. Supplementary Material for Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [3] Feurer, Matthias et al. Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning , arXiv, 2020. [4] Ono, Jorge et al. PipelineProfiler: A Visual Analytics Tool for the Exploration of AutoML Pipelines , arXiv, 2020. Auto Machine Learning\u7b46\u8a18- Bayesian Optimization A Quickstart Guide to Auto-Sklearn (AutoML) for Machine Learning Practitioners Auto-Sklearn: Scikit-Learn on Steroids \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"21.Optuna/","text":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f \u5be6\u4f5c Optuna \u641c\u5c0b\u6700\u4f73\u8d85\u53c3\u6578 \u4ee5 XGBoost \u8ff4\u6b78\u6a21\u578b\u65bc\u623f\u50f9\u9810\u6e2c\u70ba\u4f8b Optuna \u8996\u89ba\u5316\u5206\u6790\u641c\u5c0b\u7d50\u679c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u4f60\u662f\u5426\u66fe\u7d93\u89ba\u5f97\u6a21\u578b\u6709\u592a\u591a\u7684\u8d85\u53c3\u6578\u800c\u611f\u5230\u53ad\u7169\u55ce\uff1f\u8981\u5f9e\u67d0\u4e00\u500b\u6f14\u7b97\u6cd5\u5f97\u5230\u597d\u7684\u89e3\u5fc5\u9808\u8981\u8abf\u6574\u8d85\u53c3\u6578\uff0c\u6240\u8b02\u7684\u8d85\u53c3\u6578\u5c31\u662f\u63a7\u5236\u8a13\u7df4\u6a21\u578b\u7684\u4e00\u7d44\u795e\u79d8\u6578\u5b57\uff0c\u4f8b\u5982\u5b78\u7fd2\u901f\u7387\u5c31\u662f\u4e00\u7a2e\u8d85\u53c3\u6578\u3002\u4f60\u6c38\u9060\u90fd\u4e0d\u77e5\u9053 0~1 \u4e4b\u9593\u54ea\u4e00\u500b\u6578\u5b57\u662f\u6700\u9069\u5408\u7684\uff0c\u552f\u4e00\u7684\u65b9\u6cd5\u5c31\u662f\u8a66\u932f (trial and error)\u3002\u90a3\u842c\u4e00\u6a21\u578b\u6709\u591a\u500b\u8d85\u53c3\u6578\u53ef\u4ee5\u63a7\u5236\uff0c\u8c48\u4e0d\u662f\u5c31\u6709\u6210\u5343\u4e0a\u842c\u7a2e\u7d44\u5408\u8981\u6162\u6162\u5617\u8a66\u55ce\uff1f\u5982\u679c\u4f60\u6709\u4e5f\u9019\u500b\u554f\u984c\uff0c\u770b\u9019\u7bc7\u5c31\u5c0d\u4e86\uff01\u96d6\u7136\u4f60\u53ef\u80fd\u807d\u904e Sklearn \u7684 GridSearchCV \u540c\u6a23\u4e5f\u662f\u66b4\u529b\u7684\u627e\u51fa\u6700\u4f73\u53c3\u6578\uff0c\u6216\u662f\u4f7f\u7528 RandomizedSearchCV \u6307\u5b9a\u8d85\u53c3\u6578\u7684\u7bc4\u570d\u4e26\u96a8\u6a5f\u7684\u62bd\u53d6\u53c3\u6578\u9032\u2f8f\u8a13\u7df4\uff0c\u5176\u5b83\u5011\u7684\u5171\u540c\u7f3a\u9ede\u662f\u975e\u5e38\u8017\u6642\u8207\u4f54\u7528\u6a5f\u5668\u8cc7\u6e90\u3002\u9019\u88e1\u6211\u5011\u8981\u4f86\u4ecb\u7d39 Optuna \u9019\u500b\u81ea\u52d5\u627e\u8d85\u53c3\u6578\u7684\u65b9\u4fbf\u5de5\u5177\uff0c\u4e26\u4e14\u53ef\u4ee5\u548c\u591a\u500b\u5e38\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u6574\u5408\u3002Optuna \u900f\u904e\u8abf\u6574\u9069\u7576\u7684\u8d85\u53c3\u6578\u4f86\u63d0\u9ad8\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u6b64\u5c08\u6848\u6700\u521d\u65bc 2019 \u767c\u8868\u65bc arxiv \u7684\u4e00\u7bc7\u8ad6\u6587 Optuna: A Next-generation Hyperparameter Optimization Framework \u540c\u6642\u958b\u6e90\u5728 GitHub \u4e0a\u514d\u8cbb\u63d0\u4f9b\u5927\u5bb6\u4f7f\u7528\u3002\u540c\u6642 Optuna \u4e5f\u662f 2021 \u5e74 Kaggle \u8cc7\u6599\u79d1\u5b78\u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u7684\u6a21\u578b\u8abf\u53c3\u5de5\u5177\u3002\u90a3\u662f\u4ec0\u9ebc\u539f\u56e0\u8b93 Optuna \u53d7\u5230\u5ee3\u5927\u7684\u6a5f\u5668\u5b78\u7fd2\u793e\u7fa4\u5982\u6b64\u7684\u6b61\u8fce\u5462\uff1f\u5c31\u8b93\u6211\u5011\u4f86\u770b\u770b\u4ed6\u662f\u5982\u6b64\u5730\u5f37\u5927\u5427\uff01 \u95dc\u65bc Optuna Optuna \u662f\u4e00\u500b\u5c08\u70ba\u6a5f\u5668\u5b78\u7fd2\u8a2d\u8a08\u7684\u81ea\u52d5\u8d85\u53c3\u6578\u512a\u5316\u7684\u6846\u67b6\u3002\u5176\u6700\u7a81\u51fa\u7684\u7279\u9ede\u662f\uff1a \u4eba\u6027\u5316\u7684\u5b9a\u7fa9\u641c\u7d22\u7a7a\u9593\u3002 \u652f\u63f4\u5927\u591a\u6578 ML \u8207 DL \u7684\u5b78\u7fd2\u5957\u4ef6\u3002\u4f8b\u5982: Sklearn\u3001PyTorch\u3001TensorFlow, XGBoost\u3001LightGBM\u3001 CatBoost...\u7b49\u3002 \u5c0d\u5c0d\u641c\u7d22\u7d50\u679c\u63d0\u4f9b\u53ef\u89e3\u91cb\u6027(XAI)\u3002 \u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002 \u6c7a\u5b9a\u4e26\u7d42\u6b62\u4e0d\u6eff\u8db3\u9810\u5b9a\u7fa9\u689d\u4ef6\u7684\u8a66\u9a57\u3002 Optuna \u7c21\u55ae\u7bc4\u4f8b \u9019\u88e1\u6211\u5011\u8a2d\u5b9a\u4e00\u500b\u7c21\u55ae\u7684\u76ee\u6a19\u51fd\u5f0f $(x1+2)^2 + (x2-4)^2$\u3002\u6211\u5011\u90fd\u77e5\u9053\u7576\u9019\u500b\u5f0f\u5b50 x1=-2, x2=4 \u6642\u5c07\u6703\u6709\u6975\u5c0f\u503c 0\u3002\u56e0\u6b64\u6211\u5011\u5c31\u7528\u9019\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\u900f\u904e Optuna \u627e\u51fa\u9019\u500b\u51fd\u5f0f\u4e2d\u6975\u5c0f\u503c\u6240\u5c0d\u61c9\u7684 x1 \u8207 x2 \u5427\u3002 import optuna def objective ( trial ): x1 = trial . suggest_float ( \"x1\" , - 5 , 5 ) x2 = trial . suggest_float ( \"x2\" , - 5 , 5 ) return ( x1 + 2 ) ** 2 + ( x2 - 4 ) ** 2 \u9996\u5148\u8f09\u5165 optuna \u5957\u4ef6\uff0c\u5982\u679c\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install optuna \u63a5\u8457\u6211\u5011\u4f86\u5b9a\u7fa9\u4e00\u500b\u627e\u51fa\u6975\u5c0f\u503c\u7684\u76ee\u6a19\u51fd\u5f0f objective() \u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u5c07\u8981\u8a2d\u5b9a optuna \u53ef\u4ee5\u53bb\u5c0b\u627e\u7684\u4e00\u53c3\u6578\uff0c\u4e5f\u5c31\u662f x1 \u8207 x2\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e optuna \u6240\u63d0\u4f9b\u7684 trial \u7269\u4ef6\u4f86\u70ba\u6211\u5011\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u7d44\u7bc4\u570d\u3002\u5176\u4e2d\u5b83\u6709\u4e00\u500b suggest_float \u65b9\u6cd5\uff0c\u8a72\u65b9\u6cd5\u63a1\u7528\u8d85\u53c3\u6578\u7684\u540d\u7a31\u548c\u7bc4\u570d\u4f86\u5c0b\u627e\u5176\u6700\u4f73\u503c\u3002\u6211\u5011\u4ee5 x1 \u4f86\u8209\u4f8b\uff1a x1 = trial.suggest_float(\"x1\", -5, 5) \u4e0a\u9762\u9019\u4e00\u6bb5\u7a0b\u5f0f\u5728 GridSearch \u4e2d\u53ef\u4ee5\u8868\u793a\u6210 {\"x1\": np.arange(-5, 5, .1)} \u3002\u5373\u8868\u793a\u641c\u5c0b\u904e\u7a0b\u4e2d\u6211\u5011\u6703\u5f9e x1 \u96a8\u6a5f\u8a2d\u5b9a -5~5 \u4e4b\u9593\u7684\u4efb\u4e00\u6d6e\u9ede\u6578\u3002\u8a2d\u5b9a\u5b8c\u51fd\u5f0f\u5f8c\u5c31\u53ef\u4ee5\u958b\u59cb\u512a\u5316\u4e86\uff0c\u6211\u5011\u5f9e optuna \u5efa\u7acb\u4e00\u500b study \u7269\u4ef6\uff0c\u4e26\u5c07 objective \u51fd\u6578\u50b3\u905e\u7d66 study \u7684 optimize \u65b9\u6cd5\u3002\u7531\u65bc\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u627e\u51fa\u51fd\u5f0f\u4e2d\u7684\u6975\u5c0f\u503c\uff0c\u56e0\u6b64 direction \u8a2d\u70ba minimize \u3002\u53e6\u5916\u5728 optimize \u65b9\u6cd5\u4e2d\u6211\u5011\u4e5f\u53ef\u4ee5\u8a2d\u5b9a\u8a66\u9a57\u7684\u6b21\u6578(n_trials)\u6216\u6642\u9593(timeout)\u3002\u4e00\u5207\u5c31\u7dd2\u5f8c\u5373\u53ef\u958b\u59cb\u57f7\u884c\uff01\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u8fed\u4ee350\u6b21\u4e26\u5f9e\u4e2d\u627e\u5230\u4e00\u7d44\u6700\u4f73\u7684 x1 \u8207 x2 \u4f7f\u5176\u76ee\u6a19\u51fd\u5f0f\u53ef\u4ee5\u6700\u5c0f\u5316\u3002\u8dd1\u5b8c 50 \u6b21\u5f8c\u6211\u5011\u53ef\u4ee5\u7d93\u7531 study \u8b8a\u6578\u4e2d\u5f97\u5230\u4e00\u7d44\u6700\u4f73\u7684\u89e3\u3002\u8a66\u9a57\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u767c\u73fe x1 \u8da8\u8fd1\u65bc -2 \u548c x2 \u8da8\u8fd1\u65bc 4\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 50 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) \u8f38\u51fa\u7d50\u679c\uff1a Number of finished trials: 50 Best trial parameters: {'x1': -1.8154924755761588, 'x2': 3.9141985823539844} Best score: 0.04140490983908035 CPU times: user 432 ms, sys: 46.3 ms, total: 478 ms Wall time: 431 ms \u7531\u4e0a\u8ff0\u7684\u7c21\u55ae\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u5efa\u7acb\u4e00\u500b optuna \u6700\u4f73\u5316\u6d41\u7a0b\u50c5\u9700\u8981\u4e09\u6b65\u9a5f\uff1a 1. \u5efa\u7acb objective \u51fd\u5f0f\u8207\u8a2d\u5b9a trial\uff0c\u4e26\u56de\u50b3 loss\u3002 2. \u5efa\u7acb create_study() \u7269\u4ef6\u3002 3. \u4f7f\u7528 optimize() \u57f7\u884c\u641c\u5c0b\u3002 End-to-end example with XGBoost \u6211\u5011\u4ee5 Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u593e\u4f86\u505a\u7bc4\u4f8b\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u6709 13 \u500b\u5176\u8f38\u51fa\u70ba\u9810\u6e2c\u8a72\u7b46\u8cc7\u6599\u7684\u623f\u50f9\u3002\u7531\u65bc\u60f3\u8981\u5feb\u901f\u793a\u7bc4\u5982\u4f55\u4f7f\u7528 optuna\uff0c\u56e0\u6b64\u9019\u88e1\u5c31\u4e0d\u505a\u4efb\u4f55\u8cc7\u6599 EDA \u8207\u524d\u8655\u7406\u3002 from sklearn.datasets import load_boston X , y = load_boston ( return_X_y = True ) print ( 'X:' , X . shape ) print ( 'y:' , y . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X: (506, 13) y: (506,) \u8cc7\u6599\u96c6\u6210\u529f\u88ab\u8f09\u5165\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5efa\u7acb\u4e00\u500b objective \u51fd\u5f0f\u3002\u5728\u9019\u500b\u76ee\u6a19\u51fd\u5f0f\u4e2d\uff0c\u6211\u5011\u5efa\u7acb\u4e86\u4e00\u500b\u5c0f\u7bc4\u570d\u7684\u7684 XGBoost \u8d85\u53c3\u6578\u641c\u7d22\u7a7a\u9593\u3002\u5176\u6bcf\u4e00\u500b\u8d85\u53c3\u6578\u90fd\u6703\u6709\u4e00\u500b\u641c\u7d22\u7684\u7bc4\u570d\uff0c\u53ef\u4ee5\u4f7f\u7528 suggest_* \u65b9\u6cd5\u8a2d\u5b9a\u5340\u9593\u3002\u6b64\u65b9\u6cd5\u5fc5\u9808\u8f38\u5165\u8d85\u53c3\u6578\u7684\u540d\u7a31\uff0c\u4ee5\u53ca\u7d66\u4e88\u8a72\u53c3\u6578\u7684\u4e00\u7d44\u96a8\u6a5f\u7bc4\u570d\u5176\u578b\u614b\u6709\u5f88\u591a\u4f8b\u5982\uff1a suggest_int \u3001 suggest_discrete_uniform \u3001 suggest_float ...\u7b49\u3002\u66f4\u591a\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u5f9e \u5b98\u65b9\u6587\u4ef6 \u53d6\u5f97\u3002\u6216\u662f\u4e5f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9\u5728 GitHub \u4e0a\u5c0d\u65bc XGBoost \u7684\u4f7f\u7528\u7bc4\u4f8b\u3002 import optuna import xgboost as xgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def objective ( trial , X = X , y = y ): \"\"\" A function to train a model using different hyperparamerters combinations provided by Optuna. \"\"\" X_train , X_valid , y_train , y_valid = train_test_split ( X , y , test_size = 0.4 ) params = { 'max_depth' : trial . suggest_int ( 'max_depth' , 6 , 15 ), \"subsample\" : trial . suggest_float ( \"subsample\" , 0.2 , 1.0 ), 'n_estimators' : trial . suggest_int ( 'n_estimators' , 500 , 2000 , 100 ), 'eta' : trial . suggest_float ( \"eta\" , 1e-8 , 1.0 , log = True ), 'alpha' : trial . suggest_float ( 'alpha' , 1e-8 , 1.0 , log = True ), 'lambda' : trial . suggest_float ( 'lambda' , 1e-8 , 1.0 , log = True ), 'gamma' : trial . suggest_float ( \"gamma\" , 1e-8 , 1.0 , log = True ), 'min_child_weight' : trial . suggest_int ( 'min_child_weight' , 2 , 10 ), 'grow_policy' : trial . suggest_categorical ( \"grow_policy\" , [ \"depthwise\" , \"lossguide\" ]), \"colsample_bytree\" : trial . suggest_float ( \"colsample_bytree\" , 0.2 , 1.0 ) } reg = xgb . XGBRegressor ( ** params ) reg . fit ( X_train , y_train , eval_set = [( X_valid , y_valid )], eval_metric = 'rmse' , verbose = False ) return mean_squared_error ( y_valid , reg . predict ( X_valid ), squared = False ) \u8a2d\u5b9a\u597d\u8abf\u53c3\u7684\u5340\u9593\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u56c9\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 10 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) Optuna \u9810\u8a2d\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u65b9\u6cd5\u80fd\u6709\u6548\u5730\u5728\u77ed\u6642\u9593\u5167\u5f80\u6700\u4f73\u7684\u65b9\u5411\u53bb\u5c0b\u627e\u4e00\u7d44\u9069\u5408\u7684\u53c3\u6578\u3002\u8207 GridSearch \u76f8\u6bd4\u539f\u672c\u53ef\u80fd\u9700\u8981\u6578\u5c0f\u6642\u7684\u641c\u7d22\u7a7a\u9593\u5728\u77ed\u77ed\u7684\u5e7e\u5206\u9418\u5167\u5c31\u53ef\u4ee5\u7372\u5f97\u4e0d\u932f\u7684\u7d93\u679c\u3002\u4e26\u4e14\u6709\u6548\u7684\u964d\u4f4e loss\u3002\u9664\u4e86\u8ff4\u6b78\u554f\u984c Optuna \u4e5f\u80fd\u5c0d\u5206\u985e\u554f\u984c\u9032\u884c\u8d85\u53c3\u6578\u641c\u5c0b\uff0c\u5b98\u65b9\u7684 GitHub \u4e5f\u6709\u63d0\u4f9b\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6846\u67b6\u7684\u5beb\u6cd5\u3002 Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f TPESampler \u70ba\u9810\u8a2d\u7684\u8d85\u53c3\u6578\u63a1\u6a23\u5668\u3002\u5b83\u8a66\u5716\u900f\u904e\u63d0\u9ad8\u6700\u5f8c\u4e00\u6b21\u8a66\u9a57\u7684\u5206\u6578\u4f86\u5c0d\u8d85\u53c3\u6578\u5019\u9078\u8005\u9032\u884c\u63a1\u6a23\u3002\u9664\u6b64\u4e4b\u5916 Optuna \u63d0\u4f9b\u4e86\u4ee5\u4e0b\u9019\u5e7e\u500b\u53c3\u6578\u63a1\u6a23\u7684\u65b9\u5f0f: - GridSampler : \u8207 Sklearn \u7684 GridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002\u4f7f\u7528\u6b64\u65b9\u6cd5\u6642\u5efa\u8b70\u4e0d\u8981\u8a2d\u5b9a\u592a\u5927\u7684\u7bc4\u570d\u3002 - RandomSampler : \u8207 Sklearn \u7684 RandomizedGridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002 - TPESampler : \u5168\u540d Tree-structured Parzen Estimator sampler\u3002\u9810\u8a2d\u63a1\u6a23\u65b9\u5f0f\u3002 - CmaEsSampler : \u57fa\u65bc CMA ES \u6f14\u7b97\u7b97\u6cd5\u7684\u63a1\u6a23\u5668 (\u4e0d\u652f\u63f4\u985e\u5225\u578b\u7684\u8d85\u53c3\u6578). \u5982\u679c\u9700\u8981\u66ff\u63db\u63a1\u6a23\u53c3\u6578\u7684\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u3002 from optuna.samplers import CmaEsSampler , RandomSampler # Study with a random sampler study_1 = optuna . create_study ( sampler = RandomSampler ( seed = 42 )) # Study with a CMA ES sampler study_2 = optuna . create_study ( sampler = CmaEsSampler ( seed = 42 )) Optuna \u8996\u89ba\u5316\u5206\u6790 Optuna \u5728\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86\u8996\u89ba\u5316\u7684\u5957\u4ef6: - plot_optimization_history (\u8996\u89ba\u5316\u512a\u5316\u7684\u904e\u7a0b) - plot_intermediate_values (\u8996\u89ba\u5316\u5b78\u7fd2\u7684\u66f2\u7dda) - plot_parallel_coordinate (\u8996\u89ba\u5316\u9ad8\u7dad\u5ea6\u4e2d\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_contour (\u8996\u89ba\u5316\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_slice (\u8996\u89ba\u5316\u500b\u5225\u53c3\u6578) - plot_param_importances (\u53c3\u6578\u5c0d\u6a21\u578b\u7684\u91cd\u8981\u7a0b\u5ea6) - plot_edf (\u8996\u89ba\u5316\u9a57\u5206\u4f48\u51fd\u6578) \u5ef6\u7e8c\u4e0a\u9762\u7684\u7bc4\u4f8b\u6211\u5011\u4f86\u8996\u89ba\u5316\u5c55\u793a Optuna \u641c\u5c0b\u7684\u904e\u7a0b\u8207\u7d50\u679c\u3002\u9996\u5148\u6211\u5011\u4f86\u7e6a\u88fd study \u7684\u512a\u5316\u6b77\u53f2\u904e\u7a0b\u3002\u9019\u5f35\u5716\u544a\u8a34\u6211\u5011\uff0cOptuna \u53ea\u7d93\u904e\u5e7e\u6b21\u8a66\u9a57\u5c31\u4f7f\u5206\u6578\u6536\u6582\u5230\u6700\u5c0f\u503c\u3002 from optuna.visualization import plot_optimization_history plotly_config = { \"staticPlot\" : True } fig = plot_optimization_history ( study ) fig . show ( config = plotly_config ) \u63a5\u4e0b\u4f86\uff0c\u8b93\u6211\u5011\u7e6a\u88fd\u8d85\u53c3\u6578\u91cd\u8981\u6027\u3002\u5f9e\u9019\u5f35\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe eta(learning_rate) \u5b78\u7fd2\u901f\u7387\u662f\u6700\u70ba\u91cd\u8981\u7684\u3002\u6b64\u5916 grow_policy \u8207 lambda \u5c0d\u6e1b\u5c11 loss \u4e0a\u7121\u592a\u5927\u5e6b\u52a9\u3002\u56e0\u6b64\u5728\u4e0b\u4e00\u6b21\u57f7\u884c\u8a66\u9a57\u7684\u6642\u5019\u53ef\u4ee5\u8003\u616e\u5c07\u7121\u7528\u7684\u53c3\u6578\u79fb\u9664\uff0c\u4e26\u5c07\u91cd\u8981\u7684\u8d85\u53c3\u6578\u7bc4\u570d\u52a0\u5927\u53d6\u5f97\u66f4\u597d\u7684\u641c\u7d22\u7d50\u679c\u3002\u5176\u4ed6\u7684\u4f7f\u7528\u65b9\u6cd5\u53ef\u4ee5 \u53c3\u8003 \u5b98\u65b9\u7684\u8aaa\u660e\u6587\u4ef6\u3002 from optuna.visualization import plot_param_importances fig = plot_param_importances ( study ) fig . show ( config = plotly_config ) \u5c0f\u7d50 \u4eca\u5929\u6211\u5011\u4ecb\u7d39\u4e86\u9019\u4e00\u500b\u8d85\u53c3\u6578\u6700\u4f73\u5316\u7684\u5de5\u5177\uff0c\u88e1\u9762\u6709\u592a\u591a\u529f\u80fd\u5c1a\u672a\u63d0\u5230\u3002\u4f8b\u5982\uff1a\u8a66\u9a57\u7684\u526a\u679d\uff0c\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u8a2d\u5b9a\u8a66\u9a57\u7684\u4f8b\u5916\u689d\u4ef6\u7576\u4e0d\u6eff\u8db3\u9810\u5b9a\u689d\u4ef6\u5373\u4e0d\u57f7\u884c\u6b64\u6b21\u8a66\u9a57\u3002\u6216\u662f\u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002\u9664\u6b64\u4e4b\u5916\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u50cf\u662f SQLite \u7b49\u8cc7\u6599\u5eab\u53ef\u4ee5\u5132\u5b58\u6b77\u53f2\u641c\u5c0b\u7d50\u679c\u5feb\u901f\u7684\u9054\u5230\u6700\u4f73\u641c\u5c0b\u80fd\u529b\u3002\u91cd\u9ede\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u795e\u7d93\u7db2\u8def\u7684\u53c3\u6578\u641c\u5c0b\u4ee5\u53ca\u7db2\u8def\u7684\u5bec\u5ea6\u6df1\u5ea6\u9078\u64c7\u3002\u5e38\u898b\u7684\u6df1\u5ea6\u5b78\u7fd2\u6846\u67b6\u90fd\u80fd\u652f\u63f4\u4f8b\u5982 TensorFlow\u3001PyTorch\uff0cMXNet...\u7b49\u3002 Reference OPTUNA: A Flexible, Efficient and Scalable Hyperparameter Optimization Framework optuna.org \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna"},{"location":"21.Optuna/#day-21-optuna","text":"","title":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna"},{"location":"21.Optuna/#_1","text":"Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f \u5be6\u4f5c Optuna \u641c\u5c0b\u6700\u4f73\u8d85\u53c3\u6578 \u4ee5 XGBoost \u8ff4\u6b78\u6a21\u578b\u65bc\u623f\u50f9\u9810\u6e2c\u70ba\u4f8b Optuna \u8996\u89ba\u5316\u5206\u6790\u641c\u5c0b\u7d50\u679c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"21.Optuna/#_2","text":"\u4f60\u662f\u5426\u66fe\u7d93\u89ba\u5f97\u6a21\u578b\u6709\u592a\u591a\u7684\u8d85\u53c3\u6578\u800c\u611f\u5230\u53ad\u7169\u55ce\uff1f\u8981\u5f9e\u67d0\u4e00\u500b\u6f14\u7b97\u6cd5\u5f97\u5230\u597d\u7684\u89e3\u5fc5\u9808\u8981\u8abf\u6574\u8d85\u53c3\u6578\uff0c\u6240\u8b02\u7684\u8d85\u53c3\u6578\u5c31\u662f\u63a7\u5236\u8a13\u7df4\u6a21\u578b\u7684\u4e00\u7d44\u795e\u79d8\u6578\u5b57\uff0c\u4f8b\u5982\u5b78\u7fd2\u901f\u7387\u5c31\u662f\u4e00\u7a2e\u8d85\u53c3\u6578\u3002\u4f60\u6c38\u9060\u90fd\u4e0d\u77e5\u9053 0~1 \u4e4b\u9593\u54ea\u4e00\u500b\u6578\u5b57\u662f\u6700\u9069\u5408\u7684\uff0c\u552f\u4e00\u7684\u65b9\u6cd5\u5c31\u662f\u8a66\u932f (trial and error)\u3002\u90a3\u842c\u4e00\u6a21\u578b\u6709\u591a\u500b\u8d85\u53c3\u6578\u53ef\u4ee5\u63a7\u5236\uff0c\u8c48\u4e0d\u662f\u5c31\u6709\u6210\u5343\u4e0a\u842c\u7a2e\u7d44\u5408\u8981\u6162\u6162\u5617\u8a66\u55ce\uff1f\u5982\u679c\u4f60\u6709\u4e5f\u9019\u500b\u554f\u984c\uff0c\u770b\u9019\u7bc7\u5c31\u5c0d\u4e86\uff01\u96d6\u7136\u4f60\u53ef\u80fd\u807d\u904e Sklearn \u7684 GridSearchCV \u540c\u6a23\u4e5f\u662f\u66b4\u529b\u7684\u627e\u51fa\u6700\u4f73\u53c3\u6578\uff0c\u6216\u662f\u4f7f\u7528 RandomizedSearchCV \u6307\u5b9a\u8d85\u53c3\u6578\u7684\u7bc4\u570d\u4e26\u96a8\u6a5f\u7684\u62bd\u53d6\u53c3\u6578\u9032\u2f8f\u8a13\u7df4\uff0c\u5176\u5b83\u5011\u7684\u5171\u540c\u7f3a\u9ede\u662f\u975e\u5e38\u8017\u6642\u8207\u4f54\u7528\u6a5f\u5668\u8cc7\u6e90\u3002\u9019\u88e1\u6211\u5011\u8981\u4f86\u4ecb\u7d39 Optuna \u9019\u500b\u81ea\u52d5\u627e\u8d85\u53c3\u6578\u7684\u65b9\u4fbf\u5de5\u5177\uff0c\u4e26\u4e14\u53ef\u4ee5\u548c\u591a\u500b\u5e38\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u6574\u5408\u3002Optuna \u900f\u904e\u8abf\u6574\u9069\u7576\u7684\u8d85\u53c3\u6578\u4f86\u63d0\u9ad8\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u6b64\u5c08\u6848\u6700\u521d\u65bc 2019 \u767c\u8868\u65bc arxiv \u7684\u4e00\u7bc7\u8ad6\u6587 Optuna: A Next-generation Hyperparameter Optimization Framework \u540c\u6642\u958b\u6e90\u5728 GitHub \u4e0a\u514d\u8cbb\u63d0\u4f9b\u5927\u5bb6\u4f7f\u7528\u3002\u540c\u6642 Optuna \u4e5f\u662f 2021 \u5e74 Kaggle \u8cc7\u6599\u79d1\u5b78\u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u7684\u6a21\u578b\u8abf\u53c3\u5de5\u5177\u3002\u90a3\u662f\u4ec0\u9ebc\u539f\u56e0\u8b93 Optuna \u53d7\u5230\u5ee3\u5927\u7684\u6a5f\u5668\u5b78\u7fd2\u793e\u7fa4\u5982\u6b64\u7684\u6b61\u8fce\u5462\uff1f\u5c31\u8b93\u6211\u5011\u4f86\u770b\u770b\u4ed6\u662f\u5982\u6b64\u5730\u5f37\u5927\u5427\uff01","title":"\u524d\u8a00"},{"location":"21.Optuna/#optuna","text":"Optuna \u662f\u4e00\u500b\u5c08\u70ba\u6a5f\u5668\u5b78\u7fd2\u8a2d\u8a08\u7684\u81ea\u52d5\u8d85\u53c3\u6578\u512a\u5316\u7684\u6846\u67b6\u3002\u5176\u6700\u7a81\u51fa\u7684\u7279\u9ede\u662f\uff1a \u4eba\u6027\u5316\u7684\u5b9a\u7fa9\u641c\u7d22\u7a7a\u9593\u3002 \u652f\u63f4\u5927\u591a\u6578 ML \u8207 DL \u7684\u5b78\u7fd2\u5957\u4ef6\u3002\u4f8b\u5982: Sklearn\u3001PyTorch\u3001TensorFlow, XGBoost\u3001LightGBM\u3001 CatBoost...\u7b49\u3002 \u5c0d\u5c0d\u641c\u7d22\u7d50\u679c\u63d0\u4f9b\u53ef\u89e3\u91cb\u6027(XAI)\u3002 \u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002 \u6c7a\u5b9a\u4e26\u7d42\u6b62\u4e0d\u6eff\u8db3\u9810\u5b9a\u7fa9\u689d\u4ef6\u7684\u8a66\u9a57\u3002","title":"\u95dc\u65bc Optuna"},{"location":"21.Optuna/#optuna_1","text":"\u9019\u88e1\u6211\u5011\u8a2d\u5b9a\u4e00\u500b\u7c21\u55ae\u7684\u76ee\u6a19\u51fd\u5f0f $(x1+2)^2 + (x2-4)^2$\u3002\u6211\u5011\u90fd\u77e5\u9053\u7576\u9019\u500b\u5f0f\u5b50 x1=-2, x2=4 \u6642\u5c07\u6703\u6709\u6975\u5c0f\u503c 0\u3002\u56e0\u6b64\u6211\u5011\u5c31\u7528\u9019\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\u900f\u904e Optuna \u627e\u51fa\u9019\u500b\u51fd\u5f0f\u4e2d\u6975\u5c0f\u503c\u6240\u5c0d\u61c9\u7684 x1 \u8207 x2 \u5427\u3002 import optuna def objective ( trial ): x1 = trial . suggest_float ( \"x1\" , - 5 , 5 ) x2 = trial . suggest_float ( \"x2\" , - 5 , 5 ) return ( x1 + 2 ) ** 2 + ( x2 - 4 ) ** 2 \u9996\u5148\u8f09\u5165 optuna \u5957\u4ef6\uff0c\u5982\u679c\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install optuna \u63a5\u8457\u6211\u5011\u4f86\u5b9a\u7fa9\u4e00\u500b\u627e\u51fa\u6975\u5c0f\u503c\u7684\u76ee\u6a19\u51fd\u5f0f objective() \u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u5c07\u8981\u8a2d\u5b9a optuna \u53ef\u4ee5\u53bb\u5c0b\u627e\u7684\u4e00\u53c3\u6578\uff0c\u4e5f\u5c31\u662f x1 \u8207 x2\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e optuna \u6240\u63d0\u4f9b\u7684 trial \u7269\u4ef6\u4f86\u70ba\u6211\u5011\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u7d44\u7bc4\u570d\u3002\u5176\u4e2d\u5b83\u6709\u4e00\u500b suggest_float \u65b9\u6cd5\uff0c\u8a72\u65b9\u6cd5\u63a1\u7528\u8d85\u53c3\u6578\u7684\u540d\u7a31\u548c\u7bc4\u570d\u4f86\u5c0b\u627e\u5176\u6700\u4f73\u503c\u3002\u6211\u5011\u4ee5 x1 \u4f86\u8209\u4f8b\uff1a x1 = trial.suggest_float(\"x1\", -5, 5) \u4e0a\u9762\u9019\u4e00\u6bb5\u7a0b\u5f0f\u5728 GridSearch \u4e2d\u53ef\u4ee5\u8868\u793a\u6210 {\"x1\": np.arange(-5, 5, .1)} \u3002\u5373\u8868\u793a\u641c\u5c0b\u904e\u7a0b\u4e2d\u6211\u5011\u6703\u5f9e x1 \u96a8\u6a5f\u8a2d\u5b9a -5~5 \u4e4b\u9593\u7684\u4efb\u4e00\u6d6e\u9ede\u6578\u3002\u8a2d\u5b9a\u5b8c\u51fd\u5f0f\u5f8c\u5c31\u53ef\u4ee5\u958b\u59cb\u512a\u5316\u4e86\uff0c\u6211\u5011\u5f9e optuna \u5efa\u7acb\u4e00\u500b study \u7269\u4ef6\uff0c\u4e26\u5c07 objective \u51fd\u6578\u50b3\u905e\u7d66 study \u7684 optimize \u65b9\u6cd5\u3002\u7531\u65bc\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u627e\u51fa\u51fd\u5f0f\u4e2d\u7684\u6975\u5c0f\u503c\uff0c\u56e0\u6b64 direction \u8a2d\u70ba minimize \u3002\u53e6\u5916\u5728 optimize \u65b9\u6cd5\u4e2d\u6211\u5011\u4e5f\u53ef\u4ee5\u8a2d\u5b9a\u8a66\u9a57\u7684\u6b21\u6578(n_trials)\u6216\u6642\u9593(timeout)\u3002\u4e00\u5207\u5c31\u7dd2\u5f8c\u5373\u53ef\u958b\u59cb\u57f7\u884c\uff01\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u8fed\u4ee350\u6b21\u4e26\u5f9e\u4e2d\u627e\u5230\u4e00\u7d44\u6700\u4f73\u7684 x1 \u8207 x2 \u4f7f\u5176\u76ee\u6a19\u51fd\u5f0f\u53ef\u4ee5\u6700\u5c0f\u5316\u3002\u8dd1\u5b8c 50 \u6b21\u5f8c\u6211\u5011\u53ef\u4ee5\u7d93\u7531 study \u8b8a\u6578\u4e2d\u5f97\u5230\u4e00\u7d44\u6700\u4f73\u7684\u89e3\u3002\u8a66\u9a57\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u767c\u73fe x1 \u8da8\u8fd1\u65bc -2 \u548c x2 \u8da8\u8fd1\u65bc 4\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 50 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) \u8f38\u51fa\u7d50\u679c\uff1a Number of finished trials: 50 Best trial parameters: {'x1': -1.8154924755761588, 'x2': 3.9141985823539844} Best score: 0.04140490983908035 CPU times: user 432 ms, sys: 46.3 ms, total: 478 ms Wall time: 431 ms \u7531\u4e0a\u8ff0\u7684\u7c21\u55ae\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u5efa\u7acb\u4e00\u500b optuna \u6700\u4f73\u5316\u6d41\u7a0b\u50c5\u9700\u8981\u4e09\u6b65\u9a5f\uff1a 1. \u5efa\u7acb objective \u51fd\u5f0f\u8207\u8a2d\u5b9a trial\uff0c\u4e26\u56de\u50b3 loss\u3002 2. \u5efa\u7acb create_study() \u7269\u4ef6\u3002 3. \u4f7f\u7528 optimize() \u57f7\u884c\u641c\u5c0b\u3002","title":"Optuna \u7c21\u55ae\u7bc4\u4f8b"},{"location":"21.Optuna/#end-to-end-example-with-xgboost","text":"\u6211\u5011\u4ee5 Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u593e\u4f86\u505a\u7bc4\u4f8b\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u6709 13 \u500b\u5176\u8f38\u51fa\u70ba\u9810\u6e2c\u8a72\u7b46\u8cc7\u6599\u7684\u623f\u50f9\u3002\u7531\u65bc\u60f3\u8981\u5feb\u901f\u793a\u7bc4\u5982\u4f55\u4f7f\u7528 optuna\uff0c\u56e0\u6b64\u9019\u88e1\u5c31\u4e0d\u505a\u4efb\u4f55\u8cc7\u6599 EDA \u8207\u524d\u8655\u7406\u3002 from sklearn.datasets import load_boston X , y = load_boston ( return_X_y = True ) print ( 'X:' , X . shape ) print ( 'y:' , y . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X: (506, 13) y: (506,) \u8cc7\u6599\u96c6\u6210\u529f\u88ab\u8f09\u5165\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5efa\u7acb\u4e00\u500b objective \u51fd\u5f0f\u3002\u5728\u9019\u500b\u76ee\u6a19\u51fd\u5f0f\u4e2d\uff0c\u6211\u5011\u5efa\u7acb\u4e86\u4e00\u500b\u5c0f\u7bc4\u570d\u7684\u7684 XGBoost \u8d85\u53c3\u6578\u641c\u7d22\u7a7a\u9593\u3002\u5176\u6bcf\u4e00\u500b\u8d85\u53c3\u6578\u90fd\u6703\u6709\u4e00\u500b\u641c\u7d22\u7684\u7bc4\u570d\uff0c\u53ef\u4ee5\u4f7f\u7528 suggest_* \u65b9\u6cd5\u8a2d\u5b9a\u5340\u9593\u3002\u6b64\u65b9\u6cd5\u5fc5\u9808\u8f38\u5165\u8d85\u53c3\u6578\u7684\u540d\u7a31\uff0c\u4ee5\u53ca\u7d66\u4e88\u8a72\u53c3\u6578\u7684\u4e00\u7d44\u96a8\u6a5f\u7bc4\u570d\u5176\u578b\u614b\u6709\u5f88\u591a\u4f8b\u5982\uff1a suggest_int \u3001 suggest_discrete_uniform \u3001 suggest_float ...\u7b49\u3002\u66f4\u591a\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u5f9e \u5b98\u65b9\u6587\u4ef6 \u53d6\u5f97\u3002\u6216\u662f\u4e5f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9\u5728 GitHub \u4e0a\u5c0d\u65bc XGBoost \u7684\u4f7f\u7528\u7bc4\u4f8b\u3002 import optuna import xgboost as xgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def objective ( trial , X = X , y = y ): \"\"\" A function to train a model using different hyperparamerters combinations provided by Optuna. \"\"\" X_train , X_valid , y_train , y_valid = train_test_split ( X , y , test_size = 0.4 ) params = { 'max_depth' : trial . suggest_int ( 'max_depth' , 6 , 15 ), \"subsample\" : trial . suggest_float ( \"subsample\" , 0.2 , 1.0 ), 'n_estimators' : trial . suggest_int ( 'n_estimators' , 500 , 2000 , 100 ), 'eta' : trial . suggest_float ( \"eta\" , 1e-8 , 1.0 , log = True ), 'alpha' : trial . suggest_float ( 'alpha' , 1e-8 , 1.0 , log = True ), 'lambda' : trial . suggest_float ( 'lambda' , 1e-8 , 1.0 , log = True ), 'gamma' : trial . suggest_float ( \"gamma\" , 1e-8 , 1.0 , log = True ), 'min_child_weight' : trial . suggest_int ( 'min_child_weight' , 2 , 10 ), 'grow_policy' : trial . suggest_categorical ( \"grow_policy\" , [ \"depthwise\" , \"lossguide\" ]), \"colsample_bytree\" : trial . suggest_float ( \"colsample_bytree\" , 0.2 , 1.0 ) } reg = xgb . XGBRegressor ( ** params ) reg . fit ( X_train , y_train , eval_set = [( X_valid , y_valid )], eval_metric = 'rmse' , verbose = False ) return mean_squared_error ( y_valid , reg . predict ( X_valid ), squared = False ) \u8a2d\u5b9a\u597d\u8abf\u53c3\u7684\u5340\u9593\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u56c9\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 10 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) Optuna \u9810\u8a2d\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u65b9\u6cd5\u80fd\u6709\u6548\u5730\u5728\u77ed\u6642\u9593\u5167\u5f80\u6700\u4f73\u7684\u65b9\u5411\u53bb\u5c0b\u627e\u4e00\u7d44\u9069\u5408\u7684\u53c3\u6578\u3002\u8207 GridSearch \u76f8\u6bd4\u539f\u672c\u53ef\u80fd\u9700\u8981\u6578\u5c0f\u6642\u7684\u641c\u7d22\u7a7a\u9593\u5728\u77ed\u77ed\u7684\u5e7e\u5206\u9418\u5167\u5c31\u53ef\u4ee5\u7372\u5f97\u4e0d\u932f\u7684\u7d93\u679c\u3002\u4e26\u4e14\u6709\u6548\u7684\u964d\u4f4e loss\u3002\u9664\u4e86\u8ff4\u6b78\u554f\u984c Optuna \u4e5f\u80fd\u5c0d\u5206\u985e\u554f\u984c\u9032\u884c\u8d85\u53c3\u6578\u641c\u5c0b\uff0c\u5b98\u65b9\u7684 GitHub \u4e5f\u6709\u63d0\u4f9b\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6846\u67b6\u7684\u5beb\u6cd5\u3002","title":"End-to-end example with XGBoost"},{"location":"21.Optuna/#optuna_2","text":"TPESampler \u70ba\u9810\u8a2d\u7684\u8d85\u53c3\u6578\u63a1\u6a23\u5668\u3002\u5b83\u8a66\u5716\u900f\u904e\u63d0\u9ad8\u6700\u5f8c\u4e00\u6b21\u8a66\u9a57\u7684\u5206\u6578\u4f86\u5c0d\u8d85\u53c3\u6578\u5019\u9078\u8005\u9032\u884c\u63a1\u6a23\u3002\u9664\u6b64\u4e4b\u5916 Optuna \u63d0\u4f9b\u4e86\u4ee5\u4e0b\u9019\u5e7e\u500b\u53c3\u6578\u63a1\u6a23\u7684\u65b9\u5f0f: - GridSampler : \u8207 Sklearn \u7684 GridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002\u4f7f\u7528\u6b64\u65b9\u6cd5\u6642\u5efa\u8b70\u4e0d\u8981\u8a2d\u5b9a\u592a\u5927\u7684\u7bc4\u570d\u3002 - RandomSampler : \u8207 Sklearn \u7684 RandomizedGridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002 - TPESampler : \u5168\u540d Tree-structured Parzen Estimator sampler\u3002\u9810\u8a2d\u63a1\u6a23\u65b9\u5f0f\u3002 - CmaEsSampler : \u57fa\u65bc CMA ES \u6f14\u7b97\u7b97\u6cd5\u7684\u63a1\u6a23\u5668 (\u4e0d\u652f\u63f4\u985e\u5225\u578b\u7684\u8d85\u53c3\u6578). \u5982\u679c\u9700\u8981\u66ff\u63db\u63a1\u6a23\u53c3\u6578\u7684\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u3002 from optuna.samplers import CmaEsSampler , RandomSampler # Study with a random sampler study_1 = optuna . create_study ( sampler = RandomSampler ( seed = 42 )) # Study with a CMA ES sampler study_2 = optuna . create_study ( sampler = CmaEsSampler ( seed = 42 ))","title":"Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f"},{"location":"21.Optuna/#optuna_3","text":"Optuna \u5728\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86\u8996\u89ba\u5316\u7684\u5957\u4ef6: - plot_optimization_history (\u8996\u89ba\u5316\u512a\u5316\u7684\u904e\u7a0b) - plot_intermediate_values (\u8996\u89ba\u5316\u5b78\u7fd2\u7684\u66f2\u7dda) - plot_parallel_coordinate (\u8996\u89ba\u5316\u9ad8\u7dad\u5ea6\u4e2d\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_contour (\u8996\u89ba\u5316\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_slice (\u8996\u89ba\u5316\u500b\u5225\u53c3\u6578) - plot_param_importances (\u53c3\u6578\u5c0d\u6a21\u578b\u7684\u91cd\u8981\u7a0b\u5ea6) - plot_edf (\u8996\u89ba\u5316\u9a57\u5206\u4f48\u51fd\u6578) \u5ef6\u7e8c\u4e0a\u9762\u7684\u7bc4\u4f8b\u6211\u5011\u4f86\u8996\u89ba\u5316\u5c55\u793a Optuna \u641c\u5c0b\u7684\u904e\u7a0b\u8207\u7d50\u679c\u3002\u9996\u5148\u6211\u5011\u4f86\u7e6a\u88fd study \u7684\u512a\u5316\u6b77\u53f2\u904e\u7a0b\u3002\u9019\u5f35\u5716\u544a\u8a34\u6211\u5011\uff0cOptuna \u53ea\u7d93\u904e\u5e7e\u6b21\u8a66\u9a57\u5c31\u4f7f\u5206\u6578\u6536\u6582\u5230\u6700\u5c0f\u503c\u3002 from optuna.visualization import plot_optimization_history plotly_config = { \"staticPlot\" : True } fig = plot_optimization_history ( study ) fig . show ( config = plotly_config ) \u63a5\u4e0b\u4f86\uff0c\u8b93\u6211\u5011\u7e6a\u88fd\u8d85\u53c3\u6578\u91cd\u8981\u6027\u3002\u5f9e\u9019\u5f35\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe eta(learning_rate) \u5b78\u7fd2\u901f\u7387\u662f\u6700\u70ba\u91cd\u8981\u7684\u3002\u6b64\u5916 grow_policy \u8207 lambda \u5c0d\u6e1b\u5c11 loss \u4e0a\u7121\u592a\u5927\u5e6b\u52a9\u3002\u56e0\u6b64\u5728\u4e0b\u4e00\u6b21\u57f7\u884c\u8a66\u9a57\u7684\u6642\u5019\u53ef\u4ee5\u8003\u616e\u5c07\u7121\u7528\u7684\u53c3\u6578\u79fb\u9664\uff0c\u4e26\u5c07\u91cd\u8981\u7684\u8d85\u53c3\u6578\u7bc4\u570d\u52a0\u5927\u53d6\u5f97\u66f4\u597d\u7684\u641c\u7d22\u7d50\u679c\u3002\u5176\u4ed6\u7684\u4f7f\u7528\u65b9\u6cd5\u53ef\u4ee5 \u53c3\u8003 \u5b98\u65b9\u7684\u8aaa\u660e\u6587\u4ef6\u3002 from optuna.visualization import plot_param_importances fig = plot_param_importances ( study ) fig . show ( config = plotly_config )","title":"Optuna \u8996\u89ba\u5316\u5206\u6790"},{"location":"21.Optuna/#_3","text":"\u4eca\u5929\u6211\u5011\u4ecb\u7d39\u4e86\u9019\u4e00\u500b\u8d85\u53c3\u6578\u6700\u4f73\u5316\u7684\u5de5\u5177\uff0c\u88e1\u9762\u6709\u592a\u591a\u529f\u80fd\u5c1a\u672a\u63d0\u5230\u3002\u4f8b\u5982\uff1a\u8a66\u9a57\u7684\u526a\u679d\uff0c\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u8a2d\u5b9a\u8a66\u9a57\u7684\u4f8b\u5916\u689d\u4ef6\u7576\u4e0d\u6eff\u8db3\u9810\u5b9a\u689d\u4ef6\u5373\u4e0d\u57f7\u884c\u6b64\u6b21\u8a66\u9a57\u3002\u6216\u662f\u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002\u9664\u6b64\u4e4b\u5916\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u50cf\u662f SQLite \u7b49\u8cc7\u6599\u5eab\u53ef\u4ee5\u5132\u5b58\u6b77\u53f2\u641c\u5c0b\u7d50\u679c\u5feb\u901f\u7684\u9054\u5230\u6700\u4f73\u641c\u5c0b\u80fd\u529b\u3002\u91cd\u9ede\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u795e\u7d93\u7db2\u8def\u7684\u53c3\u6578\u641c\u5c0b\u4ee5\u53ca\u7db2\u8def\u7684\u5bec\u5ea6\u6df1\u5ea6\u9078\u64c7\u3002\u5e38\u898b\u7684\u6df1\u5ea6\u5b78\u7fd2\u6846\u67b6\u90fd\u80fd\u652f\u63f4\u4f8b\u5982 TensorFlow\u3001PyTorch\uff0cMXNet...\u7b49\u3002","title":"\u5c0f\u7d50"},{"location":"21.Optuna/#reference","text":"OPTUNA: A Flexible, Efficient and Scalable Hyperparameter Optimization Framework optuna.org \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"22.Plotly-Express/","text":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5b89\u88dd plotly \u624b\u628a\u624b\u5be6\u4f5c\u8996\u89ba\u5316\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 \u76f4\u65b9\u5716 \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6563\u4f48\u5716 \u7bb1\u5f62\u5716 \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u532f\u51fa\u5716\u7247 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 Plotly Express \u662f\u4e00\u500b\u9ad8\u7cbe\u7dfb\u7684\u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002\u521d\u5b78\u6a5f\u5668\u5b78\u7fd2\u7684\u4f60\u4e00\u5b9a\u78b0\u904e\u50cf\u662f matplotlib \u548c seaborn \u9019\u985e\u578b\u7684\u5716\u8868\u5316\u5957\u4ef6\uff0c\u4e0d\u904e\u4f7f\u7528\u904e Plotly Express \u6703\u8b93\u4f60\u5c0d\u65bc\u8cc7\u6599\u8996\u89ba\u5316\u6709\u66f4\u4e0d\u4e00\u6a23\u7684\u9ad4\u9a57\u3002\u5b83\u7684\u529f\u80fd\u4f7f\u7528\u8d77\u4f86\u975e\u5e38\u76f4\u89c0\uff0c\u4e26\u4e14\u53ef\u4ee5\u5f88\u597d\u5730\u8207 Pandas DataFrame \u914d\u5408\u4f7f\u7528\u3002 Plotly Express \u65bc 2019 \u5e74\u7531\u52a0\u62ff\u5927 Plotly \u9019\u9593\u516c\u53f8\u91cb\u51fa\u4e86\u7b2c\u4e00\u7248\u9ad8\u968e\u7684 Python \u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002 \u5b89\u88dd plotly \u82e5\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u8b80\u8005\uff0c\u53ef\u4ee5\u958b\u555f\u7d42\u7aef\u6a5f\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install plotly 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u5728\u4eca\u5929\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u4e00\u6a23\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4f86\u505a\u793a\u7bc4\uff0c\u8b93\u5927\u5bb6\u77a7\u77a7 Plotly Express \u662f\u5982\u512a\u96c5\u7684\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u3002 import plotly.express as px import plotly.graph_objects as go from IPython.display import HTML df_data = px . data . iris () df_data \u76f4\u65b9\u5716 \u70ba\u4e86\u66f4\u6e05\u695a\u4e86\u89e3\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u76f4\u65b9\u5716 histogram \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u3002\u5f9e\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u66f4\u6e05\u695a\u77e5\u9053\u7279\u5fb5\u7684\u6bcf\u500b\u503c\u7684\u983b\u7387\u5206\u4f48\u3002\u7531\u65bc\u76ee\u524d\u7248\u672c\u5728 Notebook \u7121\u6cd5\u76f4\u63a5\u4f7f\u7528 fig.show() \u986f\u793a\u4e92\u52d5\u5716\uff0c\u5fc5\u9808\u5b89\u88dd\u4e00\u4e9b\u5c0f\u63d2\u4ef6\u6a21\u7d44\u8207\u8a2d\u5b9a\u3002\u56e0\u6b64\u7bc4\u4f8b\u4e2d\u63a1\u7528\u6700\u7c21\u55ae\u65b9\u6cd5\uff0c\u5148\u8f49\u63db\u6210 HTML code \u4e26\u900f\u904e IPython.display \u4e2d\u7684 HTML \u65b9\u6cd5\u986f\u793a\u51fa\u4f86\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u4e5f\u80fd\u89c0\u5bdf\u6bcf\u4e00\u500b\u7368\u7acb\u7279\u5fb5\u5c0d\u65bc\u82b1\u7684\u54c1\u7a2e\u7684\u6bcf\u500b\u5206\u5e03\u72c0\u6cc1\u3002\u6bcf\u500b\u4e0d\u540c\u7684\u984f\u8272\u4ee3\u8868\u4e0d\u540c\u7684\u82b1\u6735\u54c1\u7a2e\uff0c\u6211\u5011\u53ef\u4ee5\u85c9\u7531\u53c3\u6578\u8a2d\u5b9a\u6bcf\u500b\u76f4\u65b9\u5716\u662f\u5426\u91cd\u758a\uff0c\u4ee5\u53ca\u91cd\u758a\u7684\u900f\u660e\u7a0b\u5ea6\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" , color = \"species\" ) fig . update_layout ( barmode = 'overlay' ) fig . update_traces ( opacity = 0.75 ) HTML ( fig . to_html ()) \u63a5\u4e0b\u4f86\u4e00\u6a23\u900f\u904e\u76f4\u65b9\u5716\u65b9\u5f0f\u4f86\u89c0\u5bdf\u6bcf\u500b\u82b1\u6735\u54c1\u7a2e\u7684\u6578\u91cf\u3002\u5f9e\u8996\u89ba\u5316\u53ef\u4ee5\u5f88\u6e05\u695a\u5f97\u77e5\u8a72\u8cc7\u6599\u96c6\u662f\u5426\u662f\u4e00\u500b\u5e73\u7a69\u7684\u8cc7\u6599\u96c6\u3002 fig = px . histogram ( df_data , x = 'species' , y = 'sepal_width' , histfunc = 'count' , height = 300 , title = 'Histogram Chart' ) HTML ( fig . to_html ()) \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6211\u5011\u53ef\u4ee5\u63a1\u7528 scatter_matrix \u70ba\u6bcf\u4e00\u500b\u7279\u5fb5\u5f7c\u6b64\u9593\u505a\u4e00\u500b\u95dc\u806f\u5ea6\u5206\u6790\u3002\u900f\u904e\u9019\u7a2e\u8996\u89ba\u5316\u65b9\u5f0f\u6211\u5011\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u5169\u500b\u7279\u5fb5\u9593\u662f\u5426\u6b63\u76f8\u95dc\u8207\u8ca0\u76f8\u95dc\u3002 fig = px . scatter_matrix ( df_data , dimensions = [ \"sepal_width\" , \"sepal_length\" , \"petal_width\" , \"petal_length\" ], color = \"species\" ) HTML ( fig . to_html ()) \u6563\u4f48\u5716 \u6211\u5011\u4f7f\u7528\u6563\u4f48\u5716\u5c07\u82b1\u843c\u7684\u9577\u5ea6\u8207\u5bec\u5ea6\u986f\u793a\u5728\u4e8c\u7dad\u5750\u6a19\u5e73\u9762\u4e0a\u3002\u4f7f\u7528 Plotly Express \u5957\u4ef6\u4e2d\u7684 scatter \u65b9\u6cd5\uff0c\u6211\u5011\u53ef\u4ee5\u8f15\u9b06\u69cb\u5efa\u5716\u5f62\uff0c\u4e26\u653e\u5165 DataFrame \u683c\u5f0f\u7684\u8cc7\u6599\u4e26\u6307\u5b9a\u5fc5\u8981\u53c3\u6578 x \u8ef8\u4e2d\u7684\u8b8a\u6578\u548c y \u8ef8\u4e2d\u7684\u8b8a\u6578\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" ) HTML ( fig . to_html ()) \u5982\u679c\u60f3\u8981\u66f4\u6e05\u695a\u8868\u9054\u6bcf\u500b\u8cc7\u6599\u9ede\u6240\u5c0d\u61c9\u7684\u985e\u5225\uff0c\u53ef\u4ee5\u518d\u52a0\u4e0a color \u4e26\u6307\u5b9a\u7a2e\u985e\u7684\u6b04\u4f4d\u5373\u6703\u5c07\u6240\u6709\u8cc7\u6599\u81ea\u52d5\u5206\u6210\u4e09\u985e\u3002\u6b64\u5916\u6211\u5011\u4e5f\u80fd\u5920\u8a2d\u5b9a\u6ed1\u9f20\u79fb\u5230\u8cc7\u6599\u9ede\u4e0a\u6240\u986f\u793a\u7684\u8cc7\u8a0a\uff0c\u900f\u904e hover_data \u4e26\u7d66\u4e88\u6307\u5b9a\u6b04\u4f4d\u5373\u53ef\u770b\u5230\u8f38\u51fa\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , hover_data = [ 'petal_length' , 'petal_width' ]) HTML ( fig . to_html ()) Plotly Express \u4e5f\u63d0\u4f9b\u4e09\u7dad\u7684\u8996\u89ba\u5316\uff0c\u6b64\u5916\u4f7f\u7528\u8005\u4e5f\u80fd\u5920\u904e\u63a7\u5236\u8b8a\u7248\u81ea\u7531\u7684\u653e\u5927\u8207\u7e2e\u5c0f\u751a\u81f3\u65cb\u8f49\u3002\u4e0b\u5716\u7bc4\u4f8b\u4e2d\u6211\u5011\u5c07 x \u8ef8\u8a2d\u5b9a\u82b1\u843c\u5bec\u5ea6\uff0cy \u8ef8\u8a2d\u5b9a\u82b1\u843c\u9577\u5ea6\uff0cz \u8ef8\u8a2d\u5b9a\u82b1\u74e3\u5bec\u5ea6\u3002\u6b64\u5916 size \u53ef\u4ee5\u63a7\u5236\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5927\u5c0f\uff0c\u9019\u88e1\u63a1\u7528\u82b1\u74e3\u7684\u9577\u5ea6\u505a\u70ba\u6bcf\u500b\u8cc7\u6599\u9ede\u5927\u5c0f\u7684\u4f9d\u64da\u3002\u56e0\u6b64\u5f9e\u9019\u500b\u7acb\u9ad4\u7a7a\u9593\u53ef\u4ee5\u767c\u73fe\u5f9e\u82b1\u74e3\u9577\u5ea6\u5c0d\u65bc\u82b1\u7684\u7a2e\u985e\u6709\u5f88\u5f37\u7684\u95dc\u806f\u6027\u3002 fig = px . scatter_3d ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , z = \"petal_width\" , color = \"species\" , size = 'petal_length' ) HTML ( fig . to_html ()) \u7bb1\u5f62\u5716 \u6211\u5011\u53ef\u4ee5\u900f\u904e\u7bb1\u5f62\u5716\u9032\u884c\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u5206\u6790\u3002\u5f9e\u8996\u89ba\u5316\u5716\u4e2d\u53ef\u4ee5\u6e05\u695a\u5730\u77e5\u9053\u82b1\u843c\u7684\u5bec\u5ea6\u7bc4\u570d\u4ecb\u65bc 2~4.5 \u4e4b\u9593\uff0c\u4ee5\u53ca\u56db\u5206\u4f4d\u6578\u548c\u96e2\u7fa4\u503c\u7684\u8a0a\u606f\u3002 fig = px . box ( df_data , y = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u80fd\u5920\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u82b1\u843c\u5bec\u5ea6\u5c0d\u65bc\u6bcf\u500b\u54c1\u7a2e\u7684\u5206\u5e03\u72c0\u6cc1\u3002 fig = px . box ( df_data , x = \"species\" , y = \"sepal_width\" , color = \"species\" ) HTML ( fig . to_html ()) \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u8907\u5408\u578b\u8996\u89ba\u5316\u65b9\u5f0f\u53ef\u4ee5\u540c\u6642\u9810\u89bd\u5169\u500b\u8b8a\u6578\u9593\u7684\u6563\u4f48\u5716\u8207\u7bb1\u578b\u5716\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u900f\u904e marginal_y \u8207 marginal_x \u8a2d\u7f6e\u6a6b\u8ef8\u8207\u7e31\u8ef8\u7684\u8996\u89ba\u5316\u65b9\u5f0f\uff0c\u56e0\u6b64\u5728\u4e00\u500b\u5716\u8868\u4e2d\u53ef\u4ee5\u7d50\u5408\u5169\u7a2e\u8996\u89ba\u5316\u3002\u6b64\u5916 trendline \u53ef\u4ee5\u70ba\u6563\u4f48\u5716\u7e6a\u88fd\u8da8\u52e2\u7dda\uff0c\u8a2d\u7f6e ols \u6703\u63a1\u7528\u6700\u5c0f\u5e73\u65b9\u6cd5\u4f4d\u6578\u64da\u5efa\u7acb\u4e00\u500b\u7dda\u6027\u8ff4\u6b78\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"box\" , marginal_x = \"box\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u4e5f\u53ef\u4ee5\u8a66\u8457\u5c07\u6a6b\u8ef8\u8207\u7e31\u8ef8\u6539\u6210\u76f4\u65b9\u5716\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"histogram\" , marginal_x = \"histogram\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u532f\u51fa\u5716\u7247 \u65b9\u6cd5\u4e00 \u76f4\u63a5\u9ede\u9078\u63a7\u5236\u9762\u677f\u7684\u76f8\u6a5f\u5716\u793a (Download plot as a png) \u53ef\u4ee5\u7acb\u5373\u4e0b\u8f09\u5716\u7247\u3002 \u65b9\u6cd5\u4e8c \u9996\u5148\u8981\u5b89\u88dd kaleido \u624d\u80fd\u532f\u51fa Plotly Express \u7684\u975c\u614b\u5716\u7247\u3002 !pip install kaleido \u532f\u51fa\u975c\u614b\u5716\u7247 fig . write_image ( \"./demo.png\" ) \u532f\u51fa\u7db2\u9801\u683c\u5f0f\uff0c\u4fdd\u7559\u4e92\u52d5\u5f62\u5f0f fig . write_html ( \"./demo.html\" ) Reference Plotly Express API Doc Plotly Express GitHub \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express"},{"location":"22.Plotly-Express/#day-22-python-plotly-express","text":"","title":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express"},{"location":"22.Plotly-Express/#_1","text":"\u5b89\u88dd plotly \u624b\u628a\u624b\u5be6\u4f5c\u8996\u89ba\u5316\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 \u76f4\u65b9\u5716 \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6563\u4f48\u5716 \u7bb1\u5f62\u5716 \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u532f\u51fa\u5716\u7247 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"22.Plotly-Express/#_2","text":"Plotly Express \u662f\u4e00\u500b\u9ad8\u7cbe\u7dfb\u7684\u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002\u521d\u5b78\u6a5f\u5668\u5b78\u7fd2\u7684\u4f60\u4e00\u5b9a\u78b0\u904e\u50cf\u662f matplotlib \u548c seaborn \u9019\u985e\u578b\u7684\u5716\u8868\u5316\u5957\u4ef6\uff0c\u4e0d\u904e\u4f7f\u7528\u904e Plotly Express \u6703\u8b93\u4f60\u5c0d\u65bc\u8cc7\u6599\u8996\u89ba\u5316\u6709\u66f4\u4e0d\u4e00\u6a23\u7684\u9ad4\u9a57\u3002\u5b83\u7684\u529f\u80fd\u4f7f\u7528\u8d77\u4f86\u975e\u5e38\u76f4\u89c0\uff0c\u4e26\u4e14\u53ef\u4ee5\u5f88\u597d\u5730\u8207 Pandas DataFrame \u914d\u5408\u4f7f\u7528\u3002 Plotly Express \u65bc 2019 \u5e74\u7531\u52a0\u62ff\u5927 Plotly \u9019\u9593\u516c\u53f8\u91cb\u51fa\u4e86\u7b2c\u4e00\u7248\u9ad8\u968e\u7684 Python \u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002","title":"\u524d\u8a00"},{"location":"22.Plotly-Express/#plotly","text":"\u82e5\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u8b80\u8005\uff0c\u53ef\u4ee5\u958b\u555f\u7d42\u7aef\u6a5f\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install plotly","title":"\u5b89\u88dd plotly"},{"location":"22.Plotly-Express/#1","text":"\u5728\u4eca\u5929\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u4e00\u6a23\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4f86\u505a\u793a\u7bc4\uff0c\u8b93\u5927\u5bb6\u77a7\u77a7 Plotly Express \u662f\u5982\u512a\u96c5\u7684\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u3002 import plotly.express as px import plotly.graph_objects as go from IPython.display import HTML df_data = px . data . iris () df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"22.Plotly-Express/#_3","text":"\u70ba\u4e86\u66f4\u6e05\u695a\u4e86\u89e3\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u76f4\u65b9\u5716 histogram \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u3002\u5f9e\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u66f4\u6e05\u695a\u77e5\u9053\u7279\u5fb5\u7684\u6bcf\u500b\u503c\u7684\u983b\u7387\u5206\u4f48\u3002\u7531\u65bc\u76ee\u524d\u7248\u672c\u5728 Notebook \u7121\u6cd5\u76f4\u63a5\u4f7f\u7528 fig.show() \u986f\u793a\u4e92\u52d5\u5716\uff0c\u5fc5\u9808\u5b89\u88dd\u4e00\u4e9b\u5c0f\u63d2\u4ef6\u6a21\u7d44\u8207\u8a2d\u5b9a\u3002\u56e0\u6b64\u7bc4\u4f8b\u4e2d\u63a1\u7528\u6700\u7c21\u55ae\u65b9\u6cd5\uff0c\u5148\u8f49\u63db\u6210 HTML code \u4e26\u900f\u904e IPython.display \u4e2d\u7684 HTML \u65b9\u6cd5\u986f\u793a\u51fa\u4f86\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u4e5f\u80fd\u89c0\u5bdf\u6bcf\u4e00\u500b\u7368\u7acb\u7279\u5fb5\u5c0d\u65bc\u82b1\u7684\u54c1\u7a2e\u7684\u6bcf\u500b\u5206\u5e03\u72c0\u6cc1\u3002\u6bcf\u500b\u4e0d\u540c\u7684\u984f\u8272\u4ee3\u8868\u4e0d\u540c\u7684\u82b1\u6735\u54c1\u7a2e\uff0c\u6211\u5011\u53ef\u4ee5\u85c9\u7531\u53c3\u6578\u8a2d\u5b9a\u6bcf\u500b\u76f4\u65b9\u5716\u662f\u5426\u91cd\u758a\uff0c\u4ee5\u53ca\u91cd\u758a\u7684\u900f\u660e\u7a0b\u5ea6\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" , color = \"species\" ) fig . update_layout ( barmode = 'overlay' ) fig . update_traces ( opacity = 0.75 ) HTML ( fig . to_html ()) \u63a5\u4e0b\u4f86\u4e00\u6a23\u900f\u904e\u76f4\u65b9\u5716\u65b9\u5f0f\u4f86\u89c0\u5bdf\u6bcf\u500b\u82b1\u6735\u54c1\u7a2e\u7684\u6578\u91cf\u3002\u5f9e\u8996\u89ba\u5316\u53ef\u4ee5\u5f88\u6e05\u695a\u5f97\u77e5\u8a72\u8cc7\u6599\u96c6\u662f\u5426\u662f\u4e00\u500b\u5e73\u7a69\u7684\u8cc7\u6599\u96c6\u3002 fig = px . histogram ( df_data , x = 'species' , y = 'sepal_width' , histfunc = 'count' , height = 300 , title = 'Histogram Chart' ) HTML ( fig . to_html ())","title":"\u76f4\u65b9\u5716"},{"location":"22.Plotly-Express/#_4","text":"\u6211\u5011\u53ef\u4ee5\u63a1\u7528 scatter_matrix \u70ba\u6bcf\u4e00\u500b\u7279\u5fb5\u5f7c\u6b64\u9593\u505a\u4e00\u500b\u95dc\u806f\u5ea6\u5206\u6790\u3002\u900f\u904e\u9019\u7a2e\u8996\u89ba\u5316\u65b9\u5f0f\u6211\u5011\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u5169\u500b\u7279\u5fb5\u9593\u662f\u5426\u6b63\u76f8\u95dc\u8207\u8ca0\u76f8\u95dc\u3002 fig = px . scatter_matrix ( df_data , dimensions = [ \"sepal_width\" , \"sepal_length\" , \"petal_width\" , \"petal_length\" ], color = \"species\" ) HTML ( fig . to_html ())","title":"\u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790"},{"location":"22.Plotly-Express/#_5","text":"\u6211\u5011\u4f7f\u7528\u6563\u4f48\u5716\u5c07\u82b1\u843c\u7684\u9577\u5ea6\u8207\u5bec\u5ea6\u986f\u793a\u5728\u4e8c\u7dad\u5750\u6a19\u5e73\u9762\u4e0a\u3002\u4f7f\u7528 Plotly Express \u5957\u4ef6\u4e2d\u7684 scatter \u65b9\u6cd5\uff0c\u6211\u5011\u53ef\u4ee5\u8f15\u9b06\u69cb\u5efa\u5716\u5f62\uff0c\u4e26\u653e\u5165 DataFrame \u683c\u5f0f\u7684\u8cc7\u6599\u4e26\u6307\u5b9a\u5fc5\u8981\u53c3\u6578 x \u8ef8\u4e2d\u7684\u8b8a\u6578\u548c y \u8ef8\u4e2d\u7684\u8b8a\u6578\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" ) HTML ( fig . to_html ()) \u5982\u679c\u60f3\u8981\u66f4\u6e05\u695a\u8868\u9054\u6bcf\u500b\u8cc7\u6599\u9ede\u6240\u5c0d\u61c9\u7684\u985e\u5225\uff0c\u53ef\u4ee5\u518d\u52a0\u4e0a color \u4e26\u6307\u5b9a\u7a2e\u985e\u7684\u6b04\u4f4d\u5373\u6703\u5c07\u6240\u6709\u8cc7\u6599\u81ea\u52d5\u5206\u6210\u4e09\u985e\u3002\u6b64\u5916\u6211\u5011\u4e5f\u80fd\u5920\u8a2d\u5b9a\u6ed1\u9f20\u79fb\u5230\u8cc7\u6599\u9ede\u4e0a\u6240\u986f\u793a\u7684\u8cc7\u8a0a\uff0c\u900f\u904e hover_data \u4e26\u7d66\u4e88\u6307\u5b9a\u6b04\u4f4d\u5373\u53ef\u770b\u5230\u8f38\u51fa\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , hover_data = [ 'petal_length' , 'petal_width' ]) HTML ( fig . to_html ()) Plotly Express \u4e5f\u63d0\u4f9b\u4e09\u7dad\u7684\u8996\u89ba\u5316\uff0c\u6b64\u5916\u4f7f\u7528\u8005\u4e5f\u80fd\u5920\u904e\u63a7\u5236\u8b8a\u7248\u81ea\u7531\u7684\u653e\u5927\u8207\u7e2e\u5c0f\u751a\u81f3\u65cb\u8f49\u3002\u4e0b\u5716\u7bc4\u4f8b\u4e2d\u6211\u5011\u5c07 x \u8ef8\u8a2d\u5b9a\u82b1\u843c\u5bec\u5ea6\uff0cy \u8ef8\u8a2d\u5b9a\u82b1\u843c\u9577\u5ea6\uff0cz \u8ef8\u8a2d\u5b9a\u82b1\u74e3\u5bec\u5ea6\u3002\u6b64\u5916 size \u53ef\u4ee5\u63a7\u5236\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5927\u5c0f\uff0c\u9019\u88e1\u63a1\u7528\u82b1\u74e3\u7684\u9577\u5ea6\u505a\u70ba\u6bcf\u500b\u8cc7\u6599\u9ede\u5927\u5c0f\u7684\u4f9d\u64da\u3002\u56e0\u6b64\u5f9e\u9019\u500b\u7acb\u9ad4\u7a7a\u9593\u53ef\u4ee5\u767c\u73fe\u5f9e\u82b1\u74e3\u9577\u5ea6\u5c0d\u65bc\u82b1\u7684\u7a2e\u985e\u6709\u5f88\u5f37\u7684\u95dc\u806f\u6027\u3002 fig = px . scatter_3d ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , z = \"petal_width\" , color = \"species\" , size = 'petal_length' ) HTML ( fig . to_html ())","title":"\u6563\u4f48\u5716"},{"location":"22.Plotly-Express/#_6","text":"\u6211\u5011\u53ef\u4ee5\u900f\u904e\u7bb1\u5f62\u5716\u9032\u884c\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u5206\u6790\u3002\u5f9e\u8996\u89ba\u5316\u5716\u4e2d\u53ef\u4ee5\u6e05\u695a\u5730\u77e5\u9053\u82b1\u843c\u7684\u5bec\u5ea6\u7bc4\u570d\u4ecb\u65bc 2~4.5 \u4e4b\u9593\uff0c\u4ee5\u53ca\u56db\u5206\u4f4d\u6578\u548c\u96e2\u7fa4\u503c\u7684\u8a0a\u606f\u3002 fig = px . box ( df_data , y = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u80fd\u5920\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u82b1\u843c\u5bec\u5ea6\u5c0d\u65bc\u6bcf\u500b\u54c1\u7a2e\u7684\u5206\u5e03\u72c0\u6cc1\u3002 fig = px . box ( df_data , x = \"species\" , y = \"sepal_width\" , color = \"species\" ) HTML ( fig . to_html ())","title":"\u7bb1\u5f62\u5716"},{"location":"22.Plotly-Express/#_7","text":"\u8907\u5408\u578b\u8996\u89ba\u5316\u65b9\u5f0f\u53ef\u4ee5\u540c\u6642\u9810\u89bd\u5169\u500b\u8b8a\u6578\u9593\u7684\u6563\u4f48\u5716\u8207\u7bb1\u578b\u5716\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u900f\u904e marginal_y \u8207 marginal_x \u8a2d\u7f6e\u6a6b\u8ef8\u8207\u7e31\u8ef8\u7684\u8996\u89ba\u5316\u65b9\u5f0f\uff0c\u56e0\u6b64\u5728\u4e00\u500b\u5716\u8868\u4e2d\u53ef\u4ee5\u7d50\u5408\u5169\u7a2e\u8996\u89ba\u5316\u3002\u6b64\u5916 trendline \u53ef\u4ee5\u70ba\u6563\u4f48\u5716\u7e6a\u88fd\u8da8\u52e2\u7dda\uff0c\u8a2d\u7f6e ols \u6703\u63a1\u7528\u6700\u5c0f\u5e73\u65b9\u6cd5\u4f4d\u6578\u64da\u5efa\u7acb\u4e00\u500b\u7dda\u6027\u8ff4\u6b78\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"box\" , marginal_x = \"box\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u4e5f\u53ef\u4ee5\u8a66\u8457\u5c07\u6a6b\u8ef8\u8207\u7e31\u8ef8\u6539\u6210\u76f4\u65b9\u5716\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"histogram\" , marginal_x = \"histogram\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ())","title":"\u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7"},{"location":"22.Plotly-Express/#_8","text":"","title":"\u532f\u51fa\u5716\u7247"},{"location":"22.Plotly-Express/#_9","text":"\u76f4\u63a5\u9ede\u9078\u63a7\u5236\u9762\u677f\u7684\u76f8\u6a5f\u5716\u793a (Download plot as a png) \u53ef\u4ee5\u7acb\u5373\u4e0b\u8f09\u5716\u7247\u3002","title":"\u65b9\u6cd5\u4e00"},{"location":"22.Plotly-Express/#_10","text":"\u9996\u5148\u8981\u5b89\u88dd kaleido \u624d\u80fd\u532f\u51fa Plotly Express \u7684\u975c\u614b\u5716\u7247\u3002 !pip install kaleido \u532f\u51fa\u975c\u614b\u5716\u7247 fig . write_image ( \"./demo.png\" ) \u532f\u51fa\u7db2\u9801\u683c\u5f0f\uff0c\u4fdd\u7559\u4e92\u52d5\u5f62\u5f0f fig . write_html ( \"./demo.html\" )","title":"\u65b9\u6cd5\u4e8c"},{"location":"22.Plotly-Express/#reference","text":"Plotly Express API Doc Plotly Express GitHub \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/","text":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8cc7\u6599\u7279\u5fb5\u89c0\u5bdf\u8207\u96e2\u7fa4\u503c\u5206\u6790 \u6aa2\u8996\u8cc7\u6599\u7684\u5206\u5e03\u72c0\u614b \u504f\u5ea6 (Skewness) \u5cf0\u5ea6 (Kurtosis) \u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u8cc7\u6599\u524d\u8655\u7406 (Data Preprocessing)\uff0c\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6700\u91cd\u8981\u7684\u4e00\u90e8\u5206\u3002\u4eca\u65e5\u7684\u5167\u5bb9\u53ef\u5206\u70ba\u5169\u90e8\u4efd\uff0c\u524d\u534a\u90e8\u4efd\u7b97\u662f\u4e00\u4e9b\u5c0d\u8cc7\u6599\u7684\u89c0\u5bdf\u8207\u5206\u6790\uff0c\u5f8c\u534a\u90e8\u4e3b\u8981\u662f\u91dd\u5c0d\u7279\u5fb5 x \u9032\u884c\u7d71\u8a08\u65b9\u6cd5\u7684\u8cc7\u6599\u5206\u5e03\u89c0\u5bdf\u4ee5\u53ca\u5982\u4f55\u4fee\u6b63\u8cc7\u6599\u55ae\u5cf0\u504f\u5de6\u548c\u504f\u53f3\u7684\u5e38\u898b\u65b9\u6cd5\u3002 \u8f09\u5165\u8cc7\u6599 \u5728\u4eca\u65e5\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u63a1\u7528\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u7684\u8cc7\u6599\u96c6\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\u3002\u5176\u4e2d\u6211\u5011\u6311\u9078\u5169\u500b\u7279\u5fb5\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u5206\u5225\u6709 LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b\u3001AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b\u3002 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_boston # \u8f09\u5165\u8cc7\u6599\u96c6 boston_dataset = load_boston () # \u5c07\u8cc7\u6599\u8f49\u63db\u6210pd.DataFrame\u683c\u5f0f\u3002\u76ee\u6a19\u8f38\u51fa\u662fMEDIV\uff0c\u5269\u4e0b\u7684\u5c31\u662f\u7279\u5fb5\u5373\u70ba\u8f38\u5165\u7279\u5fb5\u3002 boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston \u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u7684 describe() \u65b9\u6cd5\u5148\u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5e73\u5747\u6578\u3001\u6a19\u6e96\u5dee\u3001\u56db\u5206\u4f4d\u6578\u4ee5\u53ca\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002 # \u67e5\u770b\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1 boston . describe () \u96e2\u7fa4\u503c\u5206\u6790 \u4ee5 LSTAT \u7279\u5fb5\u8209\u4f8b\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e boxplot \u4f86\u67e5\u770b\u8a72\u7279\u5fb5\u5728 506 \u7b46\u8cc7\u6599\u4e2d\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5e73\u5747\u503c\u7d04 12\uff0c\u6700\u5927\u503c\u63a5\u8fd1 38\uff0c\u6700\u5c0f\u503c\u63a5\u8fd1 2\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5927\u65bc 32 \u4ee5\u5916\u6709\u591a\u500b\u96f6\u6563\u7684\u6578\u64da\u9ede\uff0c\u9019\u4e9b\u8cc7\u6599\u6211\u5011\u53ef\u4ee5\u4f86\u5206\u6790\u662f\u5426\u70ba\u7570\u5e38\u9ede\u3002\u56e0\u70ba\u9019\u4e9b\u7570\u5e38\u9ede\u6240\u9020\u6210\u7684\u96e2\u7fa4\u503c\u53ef\u80fd\u6703\u9020\u6210\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u56b4\u91cd\u7684\u504f\u79fb\u3002 plt . figure ( figsize = ( 2 , 5 )) plt . boxplot ( boston [ 'LSTAT' ], showmeans = True ) plt . title ( 'LSTAT' ) plt . show () \u504f\u5ea6 & \u5cf0\u5ea6 \u504f\u5ea6 (Skewness) \u504f\u5ea6 (Skewness) \u662f\u7528\u4f86\u8861\u91cf\u8cc7\u6599\u5206\u5e03\u7684\u578b\u614b\uff0c\u540c\u6642\u4e5f\u8aaa\u660e\u8cc7\u6599\u5206\u914d\u4e0d\u5c0d\u7a31\u7684\u7a0b\u5ea6\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u53f3\u504f(\u6b63\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5927\uff0c\u6545\u5e73\u5747\u6578>\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6>0\u3002 \u504f\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5206\u5e03\u5c0d\u7a31\uff0c\u5448\u9418\u5f62\u5e38\u614b\u5206\u5e03\u3002 \u5de6\u504f(\u8ca0\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5c0f\uff0c\u6545\u5e73\u5747\u6578<\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6<0\u3002 \u5cf0\u5ea6 (Kurtosis) \u5cf0\u5ea6 (Kurtossis) \u53ef\u4ee5\u53cd\u6620\u8cc7\u6599\u7684\u5206\u5e03\u5f62\u72c0\u3002\u4f8b\u5982\u8a72\u8cc7\u6599\u662f\u5426\u6bd4\u8f03\u9ad8\u8073\u6216\u662f\u6241\u5e73\u7684\u5f62\u72c0\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u5cf0\u5ea6>0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u9ad8\u5cfd\u5cf0\u3002 \u5cf0\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u5e38\u614b\u5cf0\u3002 \u5cf0\u5ea6<0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u4f4e\u6f64\u5cf0\u3002 \u5206\u5e03\u72c0\u614b LSTAT \u7279\u5fb5\u89c0\u5bdf \u6211\u5011\u53ef\u4ee5\u767c\u73fe LSTAT \u7279\u5fb5\u5448\u73fe\u53f3\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 0.91>0 \u5448\u53f3\u504f\uff0c\u800c\u5cf0\u5ea6 0.49>0 \u5448\u73fe\u9ad8\u5cfd\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b # skewness \u8207 kurtosis skewness = round ( boston [ 'LSTAT' ] . skew (), 2 ) kurtosis = round ( boston [ 'LSTAT' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'LSTAT' ], kde = True ) plt . show () AGE \u7279\u5fb5\u89c0\u5bdf \u6211\u5011\u53ef\u4ee5\u767c\u73fe AGE \u7279\u5fb5\u5448\u73fe\u5de6\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 -0.6<0 \u5448\u5de6\u504f\uff0c\u800c\u5cf0\u5ea6 -0.97<0 \u5448\u73fe\u4f4e\u6f64\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b #skewness \u8207 kurtosis skewness = round ( boston [ 'AGE' ] . skew (), 2 ) kurtosis = round ( boston [ 'AGE' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'AGE' ], kde = True ) plt . show () \u4fee\u6b63\u8cc7\u6599\u504f\u614b\u7684\u65b9\u6cd5 \u5728\u6578\u5b78\u7d71\u8a08\u6216\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u90fd\u6703\u63d0\u51fa\u5047\u8a2d\uff0c\u524d\u63d0\u662f\u8cc7\u6599\u6a23\u672c\u662f\u5177\u6709\u5e38\u614b\u5206\u4f48\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u525b\u525b\u6240\u8b1b\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u4f86\u8a55\u4f30\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u614b\uff0c\u6216\u662f\u900f\u904e\u76f4\u65b9\u5716\u8207\u6838\u5bc6\u5ea6\u4f30\u8a08\u8996\u89ba\u5316\u67e5\u770b\u8cc7\u6599\u5206\u5e03\u3002\u7576\u8cc7\u6599\u5448\u73fe\u55ae\u5cf0\u504f\u659c\u6642\uff0c\u6211\u5011\u6703\u900f\u904e\u4e00\u4e9b\u8cc7\u6599\u8f49\u63db\u6280\u5de7\uff0c\u8b93\u6240\u6709\u8cc7\u6599\u80fd\u5920\u4fee\u6b63\u56de\u5e38\u614b\u5206\u4f48\u3002\u4ee5\u4e0b\u6574\u5e7e\u5e7e\u500b\u5e38\u898b\u7684\u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5\uff1a \u5c0d\u6578\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u67090\u6216\u8ca0\u6578) \u5e73\u65b9\u6839\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u662f\u8ca0\u6578) \u7acb\u65b9\u6839\u8f49\u63db \u6b21\u65b9\u8f49\u63db (\u53ea\u80fd\u8655\u7406\u5de6\u504f) Box-Cox \u8f49\u63db \u79fb\u9664\u96e2\u7fa4\u503c \u5c0d\u6578\u8f49\u63db \u56e0\u70ba\u8cc7\u6599\u578b\u614b\u5de6\u504f\uff0c\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53d6\u5c0d\u6578\u4f86\u5c07\u8cc7\u6599\u62c9\u56de\u4f7f\u70ba\u66f4\u96c6\u4e2d\u3002 transform_data = np . log ( boston [ 'LSTAT' ]) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u5e73\u65b9\u6839\u8f49\u63db transform_data = boston [ 'LSTAT' ] ** ( 1 / 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u7acb\u65b9\u6839\u8f49\u63db transform_data = boston [ 'LSTAT' ] ** ( 1 / 3 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u6b21\u65b9\u8f49\u63db \u6b21\u65b9\u8f49\u63db\u50c5\u80fd\u4f7f\u7528\u5728\u504f\u5de6\u7684\u8cc7\u6599\u4e0a\u3002 transform_data = np . power ( boston [ 'AGE' ], 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u539f\u672c\u7684\u8cc7\u6599\u5206\u5e03\u4f4e\u6f64\u5cf0\u4e14\u6709\u9ede\u96d9\u5cf0\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u8f49\u63db\u51fa\u4f86\u6703\u6709\u5169\u5ea7\u5c71\u7684\u611f\u89ba\u3002 Box-Cox \u8f49\u63db from scipy.stats import boxcox transform_data , lam = boxcox ( boston [ 'LSTAT' ]) transform_data = pd . DataFrame ( transform_data , columns = [ 'LSTAT' ])[ 'LSTAT' ] # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u79fb\u9664\u96e2\u7fa4\u503c \u5728 Q3\uff0b1.5IQR\uff08\u56db\u5206\u4f4d\u8ddd\uff09\u548c Q1-1.5IQR \u8655\u756b\u5169\u689d\u8207\u4e2d\u4f4d\u7dda\u4e00\u6a23\u7684\u7dda\u6bb5\uff0c\u9019\u5169\u689d\u7dda\u6bb5\u70ba\u7570\u5e38\u503c\u622a\u65b7\u9ede\uff0c\u7a31\u5176\u70ba\u5167\u9650\u3002\u5728 Q3\uff0b3IQR \u548cQ1\uff0d3IQR \u8655\u756b\u5169\u689d\u7dda\u6bb5\u7a31\u5176\u70ba\u5916\u9650\u3002\u8655\u65bc\u5167\u9650\u4ee5\u5916\u4f4d\u7f6e\u7684\u9ede\u8868\u793a\u7684\u6578\u64da\u90fd\u662f\u7570\u5e38\u503c\uff0c\u5176\u4e2d\u5728\u5167\u9650\u8207\u5916\u9650\u4e4b\u9593\u7684\u7570\u5e38\u503c\u70ba\u6eab\u548c\u7684\u7570\u5e38\u503c\uff08mild outliers\uff09\uff0c\u5728\u5916\u9650\u4ee5\u5916\u7684\u70ba\u6975\u7aef\u7684\u7570\u5e38\u503c (extreme outliers)\u3002 # \u5c07\u6240\u6709\u7279\u5fb5\u8d85\u51fa1.5\u500dIQR\u7684\u6982\u5ff5\u5c07\u9019\u4e9bOutlier\u5148\u53bb\u6389\uff0c\u907f\u514d\u5c0dModel\u9020\u6210\u5f71\u97ff\u3002 print ( \"Shape Of The Before Ouliers: \" , boston [ 'LSTAT' ] . shape ) n = 1.5 #IQR = Q3-Q1 IQR = np . percentile ( boston [ 'LSTAT' ], 75 ) - np . percentile ( boston [ 'LSTAT' ], 25 ) # outlier = Q3 + n*IQR transform_data = boston [ boston [ 'LSTAT' ] < np . percentile ( boston [ 'LSTAT' ], 75 ) + n * IQR ] # outlier = Q1 - n*IQR transform_data = transform_data [ transform_data [ 'LSTAT' ] > np . percentile ( transform_data [ 'LSTAT' ], 25 ) - n * IQR ][ 'LSTAT' ] print ( \"Shape Of The After Ouliers: \" , transform_data . shape ) \u6211\u5011\u5fc5\u9808\u5c07\u8d85\u51fa 1.5 \u500d\u7684\u6975\u7aef\u7570\u5e38\u503c\u6e05\u6389\u3002\u5171\u6709 7 \u7b46\u8cc7\u6599\u88ab\u79fb\u9664\u6389\u3002 \u8f38\u51fa\u7d50\u679c\uff1a Shape Of The Before Ouliers: (506,) Shape Of The After Ouliers: (499,) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#day-23","text":"","title":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_1","text":"\u8cc7\u6599\u7279\u5fb5\u89c0\u5bdf\u8207\u96e2\u7fa4\u503c\u5206\u6790 \u6aa2\u8996\u8cc7\u6599\u7684\u5206\u5e03\u72c0\u614b \u504f\u5ea6 (Skewness) \u5cf0\u5ea6 (Kurtosis) \u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_2","text":"\u8cc7\u6599\u524d\u8655\u7406 (Data Preprocessing)\uff0c\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6700\u91cd\u8981\u7684\u4e00\u90e8\u5206\u3002\u4eca\u65e5\u7684\u5167\u5bb9\u53ef\u5206\u70ba\u5169\u90e8\u4efd\uff0c\u524d\u534a\u90e8\u4efd\u7b97\u662f\u4e00\u4e9b\u5c0d\u8cc7\u6599\u7684\u89c0\u5bdf\u8207\u5206\u6790\uff0c\u5f8c\u534a\u90e8\u4e3b\u8981\u662f\u91dd\u5c0d\u7279\u5fb5 x \u9032\u884c\u7d71\u8a08\u65b9\u6cd5\u7684\u8cc7\u6599\u5206\u5e03\u89c0\u5bdf\u4ee5\u53ca\u5982\u4f55\u4fee\u6b63\u8cc7\u6599\u55ae\u5cf0\u504f\u5de6\u548c\u504f\u53f3\u7684\u5e38\u898b\u65b9\u6cd5\u3002","title":"\u524d\u8a00"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_3","text":"\u5728\u4eca\u65e5\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u63a1\u7528\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u7684\u8cc7\u6599\u96c6\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\u3002\u5176\u4e2d\u6211\u5011\u6311\u9078\u5169\u500b\u7279\u5fb5\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u5206\u5225\u6709 LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b\u3001AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b\u3002 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_boston # \u8f09\u5165\u8cc7\u6599\u96c6 boston_dataset = load_boston () # \u5c07\u8cc7\u6599\u8f49\u63db\u6210pd.DataFrame\u683c\u5f0f\u3002\u76ee\u6a19\u8f38\u51fa\u662fMEDIV\uff0c\u5269\u4e0b\u7684\u5c31\u662f\u7279\u5fb5\u5373\u70ba\u8f38\u5165\u7279\u5fb5\u3002 boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston \u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u7684 describe() \u65b9\u6cd5\u5148\u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5e73\u5747\u6578\u3001\u6a19\u6e96\u5dee\u3001\u56db\u5206\u4f4d\u6578\u4ee5\u53ca\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002 # \u67e5\u770b\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1 boston . describe ()","title":"\u8f09\u5165\u8cc7\u6599"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_4","text":"\u4ee5 LSTAT \u7279\u5fb5\u8209\u4f8b\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e boxplot \u4f86\u67e5\u770b\u8a72\u7279\u5fb5\u5728 506 \u7b46\u8cc7\u6599\u4e2d\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5e73\u5747\u503c\u7d04 12\uff0c\u6700\u5927\u503c\u63a5\u8fd1 38\uff0c\u6700\u5c0f\u503c\u63a5\u8fd1 2\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5927\u65bc 32 \u4ee5\u5916\u6709\u591a\u500b\u96f6\u6563\u7684\u6578\u64da\u9ede\uff0c\u9019\u4e9b\u8cc7\u6599\u6211\u5011\u53ef\u4ee5\u4f86\u5206\u6790\u662f\u5426\u70ba\u7570\u5e38\u9ede\u3002\u56e0\u70ba\u9019\u4e9b\u7570\u5e38\u9ede\u6240\u9020\u6210\u7684\u96e2\u7fa4\u503c\u53ef\u80fd\u6703\u9020\u6210\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u56b4\u91cd\u7684\u504f\u79fb\u3002 plt . figure ( figsize = ( 2 , 5 )) plt . boxplot ( boston [ 'LSTAT' ], showmeans = True ) plt . title ( 'LSTAT' ) plt . show ()","title":"\u96e2\u7fa4\u503c\u5206\u6790"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_5","text":"","title":"\u504f\u5ea6 & \u5cf0\u5ea6"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#skewness","text":"\u504f\u5ea6 (Skewness) \u662f\u7528\u4f86\u8861\u91cf\u8cc7\u6599\u5206\u5e03\u7684\u578b\u614b\uff0c\u540c\u6642\u4e5f\u8aaa\u660e\u8cc7\u6599\u5206\u914d\u4e0d\u5c0d\u7a31\u7684\u7a0b\u5ea6\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u53f3\u504f(\u6b63\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5927\uff0c\u6545\u5e73\u5747\u6578>\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6>0\u3002 \u504f\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5206\u5e03\u5c0d\u7a31\uff0c\u5448\u9418\u5f62\u5e38\u614b\u5206\u5e03\u3002 \u5de6\u504f(\u8ca0\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5c0f\uff0c\u6545\u5e73\u5747\u6578<\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6<0\u3002","title":"\u504f\u5ea6 (Skewness)"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#kurtosis","text":"\u5cf0\u5ea6 (Kurtossis) \u53ef\u4ee5\u53cd\u6620\u8cc7\u6599\u7684\u5206\u5e03\u5f62\u72c0\u3002\u4f8b\u5982\u8a72\u8cc7\u6599\u662f\u5426\u6bd4\u8f03\u9ad8\u8073\u6216\u662f\u6241\u5e73\u7684\u5f62\u72c0\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u5cf0\u5ea6>0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u9ad8\u5cfd\u5cf0\u3002 \u5cf0\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u5e38\u614b\u5cf0\u3002 \u5cf0\u5ea6<0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u4f4e\u6f64\u5cf0\u3002","title":"\u5cf0\u5ea6 (Kurtosis)"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_6","text":"","title":"\u5206\u5e03\u72c0\u614b"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#lstat","text":"\u6211\u5011\u53ef\u4ee5\u767c\u73fe LSTAT \u7279\u5fb5\u5448\u73fe\u53f3\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 0.91>0 \u5448\u53f3\u504f\uff0c\u800c\u5cf0\u5ea6 0.49>0 \u5448\u73fe\u9ad8\u5cfd\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b # skewness \u8207 kurtosis skewness = round ( boston [ 'LSTAT' ] . skew (), 2 ) kurtosis = round ( boston [ 'LSTAT' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'LSTAT' ], kde = True ) plt . show ()","title":"LSTAT \u7279\u5fb5\u89c0\u5bdf"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#age","text":"\u6211\u5011\u53ef\u4ee5\u767c\u73fe AGE \u7279\u5fb5\u5448\u73fe\u5de6\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 -0.6<0 \u5448\u5de6\u504f\uff0c\u800c\u5cf0\u5ea6 -0.97<0 \u5448\u73fe\u4f4e\u6f64\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b #skewness \u8207 kurtosis skewness = round ( boston [ 'AGE' ] . skew (), 2 ) kurtosis = round ( boston [ 'AGE' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'AGE' ], kde = True ) plt . show ()","title":"AGE \u7279\u5fb5\u89c0\u5bdf"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_7","text":"\u5728\u6578\u5b78\u7d71\u8a08\u6216\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u90fd\u6703\u63d0\u51fa\u5047\u8a2d\uff0c\u524d\u63d0\u662f\u8cc7\u6599\u6a23\u672c\u662f\u5177\u6709\u5e38\u614b\u5206\u4f48\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u525b\u525b\u6240\u8b1b\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u4f86\u8a55\u4f30\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u614b\uff0c\u6216\u662f\u900f\u904e\u76f4\u65b9\u5716\u8207\u6838\u5bc6\u5ea6\u4f30\u8a08\u8996\u89ba\u5316\u67e5\u770b\u8cc7\u6599\u5206\u5e03\u3002\u7576\u8cc7\u6599\u5448\u73fe\u55ae\u5cf0\u504f\u659c\u6642\uff0c\u6211\u5011\u6703\u900f\u904e\u4e00\u4e9b\u8cc7\u6599\u8f49\u63db\u6280\u5de7\uff0c\u8b93\u6240\u6709\u8cc7\u6599\u80fd\u5920\u4fee\u6b63\u56de\u5e38\u614b\u5206\u4f48\u3002\u4ee5\u4e0b\u6574\u5e7e\u5e7e\u500b\u5e38\u898b\u7684\u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5\uff1a \u5c0d\u6578\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u67090\u6216\u8ca0\u6578) \u5e73\u65b9\u6839\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u662f\u8ca0\u6578) \u7acb\u65b9\u6839\u8f49\u63db \u6b21\u65b9\u8f49\u63db (\u53ea\u80fd\u8655\u7406\u5de6\u504f) Box-Cox \u8f49\u63db \u79fb\u9664\u96e2\u7fa4\u503c","title":"\u4fee\u6b63\u8cc7\u6599\u504f\u614b\u7684\u65b9\u6cd5"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_8","text":"\u56e0\u70ba\u8cc7\u6599\u578b\u614b\u5de6\u504f\uff0c\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53d6\u5c0d\u6578\u4f86\u5c07\u8cc7\u6599\u62c9\u56de\u4f7f\u70ba\u66f4\u96c6\u4e2d\u3002 transform_data = np . log ( boston [ 'LSTAT' ]) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u5c0d\u6578\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_9","text":"transform_data = boston [ 'LSTAT' ] ** ( 1 / 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u5e73\u65b9\u6839\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_10","text":"transform_data = boston [ 'LSTAT' ] ** ( 1 / 3 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u7acb\u65b9\u6839\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_11","text":"\u6b21\u65b9\u8f49\u63db\u50c5\u80fd\u4f7f\u7528\u5728\u504f\u5de6\u7684\u8cc7\u6599\u4e0a\u3002 transform_data = np . power ( boston [ 'AGE' ], 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u539f\u672c\u7684\u8cc7\u6599\u5206\u5e03\u4f4e\u6f64\u5cf0\u4e14\u6709\u9ede\u96d9\u5cf0\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u8f49\u63db\u51fa\u4f86\u6703\u6709\u5169\u5ea7\u5c71\u7684\u611f\u89ba\u3002","title":"\u6b21\u65b9\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#box-cox","text":"from scipy.stats import boxcox transform_data , lam = boxcox ( boston [ 'LSTAT' ]) transform_data = pd . DataFrame ( transform_data , columns = [ 'LSTAT' ])[ 'LSTAT' ] # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"Box-Cox \u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_12","text":"\u5728 Q3\uff0b1.5IQR\uff08\u56db\u5206\u4f4d\u8ddd\uff09\u548c Q1-1.5IQR \u8655\u756b\u5169\u689d\u8207\u4e2d\u4f4d\u7dda\u4e00\u6a23\u7684\u7dda\u6bb5\uff0c\u9019\u5169\u689d\u7dda\u6bb5\u70ba\u7570\u5e38\u503c\u622a\u65b7\u9ede\uff0c\u7a31\u5176\u70ba\u5167\u9650\u3002\u5728 Q3\uff0b3IQR \u548cQ1\uff0d3IQR \u8655\u756b\u5169\u689d\u7dda\u6bb5\u7a31\u5176\u70ba\u5916\u9650\u3002\u8655\u65bc\u5167\u9650\u4ee5\u5916\u4f4d\u7f6e\u7684\u9ede\u8868\u793a\u7684\u6578\u64da\u90fd\u662f\u7570\u5e38\u503c\uff0c\u5176\u4e2d\u5728\u5167\u9650\u8207\u5916\u9650\u4e4b\u9593\u7684\u7570\u5e38\u503c\u70ba\u6eab\u548c\u7684\u7570\u5e38\u503c\uff08mild outliers\uff09\uff0c\u5728\u5916\u9650\u4ee5\u5916\u7684\u70ba\u6975\u7aef\u7684\u7570\u5e38\u503c (extreme outliers)\u3002 # \u5c07\u6240\u6709\u7279\u5fb5\u8d85\u51fa1.5\u500dIQR\u7684\u6982\u5ff5\u5c07\u9019\u4e9bOutlier\u5148\u53bb\u6389\uff0c\u907f\u514d\u5c0dModel\u9020\u6210\u5f71\u97ff\u3002 print ( \"Shape Of The Before Ouliers: \" , boston [ 'LSTAT' ] . shape ) n = 1.5 #IQR = Q3-Q1 IQR = np . percentile ( boston [ 'LSTAT' ], 75 ) - np . percentile ( boston [ 'LSTAT' ], 25 ) # outlier = Q3 + n*IQR transform_data = boston [ boston [ 'LSTAT' ] < np . percentile ( boston [ 'LSTAT' ], 75 ) + n * IQR ] # outlier = Q1 - n*IQR transform_data = transform_data [ transform_data [ 'LSTAT' ] > np . percentile ( transform_data [ 'LSTAT' ], 25 ) - n * IQR ][ 'LSTAT' ] print ( \"Shape Of The After Ouliers: \" , transform_data . shape ) \u6211\u5011\u5fc5\u9808\u5c07\u8d85\u51fa 1.5 \u500d\u7684\u6975\u7aef\u7570\u5e38\u503c\u6e05\u6389\u3002\u5171\u6709 7 \u7b46\u8cc7\u6599\u88ab\u79fb\u9664\u6389\u3002 \u8f38\u51fa\u7d50\u679c\uff1a Shape Of The Before Ouliers: (506,) Shape Of The After Ouliers: (499,) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u79fb\u9664\u96e2\u7fa4\u503c"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/","text":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u6df1\u5165\u7406\u89e3\u5ea6\u64ec\u5408\u8207\u6b20\u64ec\u5408 Bias-Variance Tradeoff \u5982\u4f55\u907f\u514d\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408\uff1f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5927\u5bb6\u53ef\u80fd\u6700\u5e38\u898b\u7684\u554f\u984c\u662f\uff0c\u7576\u8a13\u7df4\u597d\u4e86\u6a21\u578b\u4e26\u5728\u6e2c\u8a66\u8cc7\u6599\u4e5f\u7372\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\u3002\u65bc\u662f\u5f88\u958b\u5fc3\u7684\u843d\u5730\u4e26\u90e8\u7f72\u5230\u771f\u5be6\u5834\u57df\u4e2d\uff0c\u6b8a\u4e0d\u77e5\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u9060\u9060\u4e0d\u5982\u9810\u671f\u3002\u6211\u60f3\u9019\u500b\u75db\u9ede\u5927\u5bb6\u61c9\u8a72\u90fd\u7d93\u6b77\u904e\uff0c\u5c24\u5176\u662f\u6a5f\u5668\u5b78\u7fd2\u7684\u65b0\u624b\u3002\u9019\u7a2e\u60c5\u6cc1\u5c31\u662f\u6240\u8b02\u7684\u904e\u5ea6\u64ec\u5408\uff0c\u5b83\u662f\u4e00\u500b\u5728\u6a5f\u5668\u5b78\u7fd2\u9818\u57df\u4e2d\u975e\u5e38\u68d8\u624b\u7684\u7684\u554f\u984c\u3002\u7576\u4f60\u7684\u6a21\u578b\u904e\u5ea6\u7684\u64ec\u5408\u8a13\u7df4\u96c6\uff0c\u9019\u610f\u5473\u8457\u4f60\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u7684\u53bb\u8a18\u4f4f\u6240\u6709\u73fe\u6709\u7684\u6578\u64da\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u4e0d\u4f73\uff0c\u9019\u4e0d\u662f\u6211\u5011\u671f\u671b\u7684\u3002\u6240\u8b02\u7684\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u662f\u6307\uff0c\u7576\u6211\u5229\u7528\u8a13\u7df4\u96c6\u8a13\u7df4\u4e00\u500b\u6a21\u578b\u5f8c\u518d\u62ff\u53e6\u4e00\u7d44\u6a21\u578b\u6c92\u770b\u904e\u7684\u8cc7\u6599\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u7684\u9810\u6e2c\u7d50\u679c\u5982\u679c\u5728\u6c92\u770b\u904e\u7684\u8cc7\u6599\u4e2d\u4f9d\u7136\u4fdd\u6301\u4e0d\u932f\u7684\u8868\u73fe\u6211\u5011\u5c31\u53ef\u4ee5\u8aaa\u6b64\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u5f37\u3002\u4eca\u5929\u6211\u5011\u5c07\u4f86\u8a73\u7d30\u63a2\u8a0e\u4f55\u8b02\u904e\u5ea6\u64ec\u5408\uff0c\u4ee5\u53ca\u8a72\u5982\u4f55\u53bb\u89e3\u6c7a\u5b83\u4f7f\u5f97\u6a21\u578b\u8655\u65bc\u4e00\u500b\u9069\u7576\u7684\u72c0\u614b\u3002 \u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u901a\u5e38\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u8981\u8207\u5be6\u969b\u7684\u6578\u503c\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u4e5f\u5c31\u662f\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6211\u5011\u8981\u60f3\u8fa6\u6cd5\u6700\u5c0f\u5316\u8aa4\u5dee\u4f7f\u5f97\u6a21\u578b\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8a55\u4f30\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u597d\u58de\u5462\uff1f\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u8981\u8a13\u7df4\u4e00\u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u6700\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u627e\u51fa\u4e00\u689d\u7dda\u5920\u5c07\u9019\u5169\u500b\u985e\u5225\u5b8c\u6574\u5730\u5206\u958b\uff0c\u7136\u800c\u9019\u4e00\u689d\u5207\u5272\u7684\u7dda\u8981\u9577\u5f97\u600e\u6a23\u624d\u662f\u597d\u7684\u6a21\u578b\u5462\uff1f\u5f9e\u4e0b\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5b8c\u6574\u7684\u64ec\u5408\u65bc\u8a13\u7df4\u8cc7\u6599\uff0c\u800c\u7d05\u8272\u5be6\u7dda\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u6bd4\u8f03\u6c92\u6709\u90a3\u9ebc\u56b4\u53b2\uff0c\u5728\u5169\u500b\u985e\u5225\u9593\u9069\u7576\u7684\u627e\u51fa\u4e00\u689d\u5e73\u6ed1\u7684\u66f2\u7dda\u4f86\u5340\u9694\u5169\u985e\u7684\u8cc7\u6599\u3002 \u63a5\u8457\u6211\u5011\u62ff\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u9810\u6e2c\uff0c\u53ef\u4ee5\u767c\u73fe\u7531\u65bc\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u5b8c\u6574\u8a18\u4f4f\u4e86\u8a13\u7df4\u96c6\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u5728\u65b0\u7684\u6c92\u770b\u904e\u7684\u8cc7\u6599\u8868\u73fe\u5c31\u6c92\u6709\u90a3\u9ebc\u597d\u4e86\u3002\u5c24\u5176\u662f\u5728\u5169\u985e\u5225\u5206\u9694\u7dda\u9644\u8fd1\u7684\u8cc7\u6599\u6700\u80fd\u770b\u51fa\u7aef\u502a\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u5f88\u78ba\u5b9a\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u4e86\u3002\u53e6\u5916\u7d05\u8272\u5be6\u73fe\u7684\u6a21\u578b\u96d6\u7136\u5728\u8a13\u7df4\u96c6\u4e2d\u6709\u5e7e\u7b46\u6703\u9810\u6e2c\u932f\u8aa4\uff0c\u4f46\u662f\u5b83\u518d\u6e2c\u8a66\u96c6\u8cc7\u6599\u4e2d\u4e00\u6a23\u4fdd\u6301\u7a69\u5b9a\u7684\u9810\u6e2c\u80fd\u529b\u3002 \u5f9e\u4e0a\u8ff0\u7684\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\uff0c\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e26\u975e\u8a13\u7df4\u96c6\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u6211\u5011\u5fc5\u9808\u540c\u6642\u62ff\u6e2c\u8a66\u96c6\u9a57\u8b49\u6a21\u578b\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u8aa4\u5dee\u8981\u8d8a\u8fd1\u8d8a\u597d\u3002 \u4e00\u500b\u9069\u7576\u7684\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\u5305\u62ec\uff1a - \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 - \u8cc7\u6599\u8996\u89ba\u5316\u8207\u524d\u8655\u7406 - \u5c0b\u627e\u9069\u5408\u7684\u6a21\u578b - \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578 - \u4f7f\u7528\u9069\u7576\u7684\u6307\u6a19\u8a55\u4f30\u6a21\u578b - \u4ea4\u53c9\u9a57\u8b49\u6a21\u578b Overfitting vs. Underfitting \u904e\u5ea6\u64ec\u5408\u7684\u53cd\u7fa9\u5c31\u662f\u6b20\u64ec\u5408\uff0c\u5f9e\u5b57\u9762\u4e0a\u53ef\u4ee5\u5f97\u77e5\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u662f\u4e0d\u597d\u7684\u3002\u7576\u6a21\u578b\u592a\u7c21\u55ae\u6642\u6703\u767c\u751f\u6b20\u64ec\u5408\uff0c\u6216\u662f\u52a0\u5165\u592a\u591a\u7684 L1/L2 \u6b63\u5247\u5316\u9650\u5236\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u4f7f\u6a21\u578b\u5728\u5f9e\u6578\u64da\u96c6\u4e2d\u5b78\u7fd2\u6642\u8b8a\u5f97\u4e0d\u9748\u6d3b\u3002\u4e00\u500b\u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u5728\u9810\u6e2c\u4e2d\u5f80\u5f80\u5177\u6709\u8f03\u5c0f\u7684\u65b9\u5dee(variance)\u800c\u5c0e\u81f4\u504f\u5dee(bias)\u5c31\u6703\u8b8a\u5927\u3002\u76f8\u53cd\u7684\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u6709\u8f03\u7684\u8b8a\u7570\u9032\u800c\u5c0e\u81f4\u65b9\u5dee\u5927\uff0c\u540c\u6642\u504f\u5dee\u6703\u8b8a\u5c0f\u3002\u504f\u5dee\u548c\u65b9\u5dee\u90fd\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684\u9810\u6e2c\u8aa4\u5dee\u7684\u65b9\u5f0f\u3002\u5728\u4e00\u822c\u60c5\u6cc1\u4e0b\u6211\u5011\u53ef\u4ee5\u6e1b\u5c11\u504f\u5dee\u6240\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u4f46\u53ef\u80fd\u6703\u5c0e\u81f4\u589e\u52a0\u65b9\u5dee\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u53cd\u4e4b\u4ea6\u7136\u3002 \u9019\u88e1\u6211\u5011\u5c31\u8981\u4f86\u601d\u8003\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e2d\u7684 error \u5f9e\u4f55\u800c\u4f86\uff1f\u6a21\u578b\u4e2d\u7684 error \u662f\u5224\u65b7\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u4f9d\u64da\uff0c\u4f46\u5176\u5be6\u6211\u5011\u53ef\u4ee5\u5c07 error \u62c6\u5206\u6210\u5169\u5927\u90e8\u5206\u3002\u5206\u5225\u6709 Bias \u8207 Variance \u5169\u500b\u90e8\u5206\u3002\u4ee5\u5be6\u969b\u4f8b\u5b50\u4f86\u8aaa\uff0c\u5047\u8a2d\u8f38\u51fa y \u662f\u8f38\u5165 x \u771f\u6b63\u7684\u7b54\u6848\uff0c\u800c \u0177 \u5247\u662f\u900f\u904e\u6a21\u578b f(x) \u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u503c\uff0c\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u7684\u7d50\u679c\u8981\u8207\u771f\u5be6\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u7576 \u0177\u2260y \u6642\u5c31\u6703\u7522\u751f error (\u8aa4\u5dee)\u3002 Bias-Variance Tradeoff \u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u5b58\u5728\u8457\u4e00\u4e9b\u95dc\u4fc2\uff0c\u6211\u5011\u5fc5\u9808\u5f9e\u4e2d\u627e\u5230\u4e00\u500b\u9069\u7576\u7684\u5e73\u8861\u9ede\u3002\u56e0\u6b64\u6211\u5011\u5e0c\u671b\u900f\u904e\u6b0a\u8861 bias error \u8ddf variance error \u4f86\u4f7f\u5f97\u7e3d\u8aa4\u5dee\u9054\u5230\u6700\u5c0f\u3002\u6211\u5011\u5e38\u6703\u4ee5\u6253\u9776\u4f8b\u5b50\u89e3\u91cb\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u7684\u95dc\u806f\u6027\u3002\u5047\u8a2d\u6211\u5011\u767c\u5c04\u5341\u6b21\uff0c\u6211\u5011\u8aac\u4e00\u500b\u4eba\u7684\u6253\u9776\u6280\u8853\u5f88\u7cbe\u6e96\u3002\u5176\u4e2d\u7684 \u7cbe \u5c31\u8868\u793a\u9019\u5341\u500b\u628a\u9762\u4e0a\u7684\u9ede\u5f7c\u6b64\u9593\u8ddd\u96e2\u90fd\u76f8\u7576\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u7684\u65b9\u5dee\u975e\u5e38\u4f4e(low variance)\u3002\u53e6\u5916\u6240\u8b02\u7684 \u6e96 \u5c31\u8868\u793a\u9019\u5341\u500b\u9ede\u90fd\u96e2\u6e96\u5fc3\u5f88\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u5011\u7684\u504f\u5dee\u975e\u5e38\u4f4e(low bias)\u3002 Underfitting: \u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u4f7f\u5f97\u9810\u6e2c\u7d50\u679c\u5f48\u6027\u4e0d\u9ad8\uff0c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u90fd\u4e0d\u597d\u3002low variance (high bias)\u3002 Overfitting: \u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u4f7f\u5f97\u8a13\u7df4\u96c6\u5b8c\u6574\u7684\u88ab\u64ec\u5408\uff0c\u56e0\u6b64\u8a13\u7df4\u96c6\u8868\u73fe\u6975\u597d\uff0c\u4f46\u6e2c\u8a66\u96c6\u8868\u73fe\u4e0d\u4f73\u3002high variance (low bias)\u3002 Error from Bias \u504f\u5dee(bias)\u5c31\u662f\u6a21\u578b\u7684\u9810\u6e2c\u8207\u771f\u5be6\u503c\u4e4b\u9593\u7684\u5dee\u7570\u3002\u4e00\u822c\u6211\u5011\u8a13\u7df4\u6a21\u578b\u662f\u671f\u671b\u9810\u6e2c\u7684\u503c\u8981\u8207\u5be6\u969b\u7684\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u7136\u800c\u7576\u4e00\u500b\u7c21\u55ae\u7684\u7dda\u6027\u6a21\u578b\u53ef\u80fd\u7121\u6cd5\u5b8c\u6574\u5730\u64ec\u5408\u5230\u4e00\u500b\u8907\u96dc\u975e\u7dda\u6027\u7684\u8cc7\u6599\u96c6\u3002\u56e0\u6b64\u5982\u4e0b\u5716\u6240\u793a\uff0c\u7576\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u504f\u5dee\u904e\u5927\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u8a72\u6a21\u578b\u904e\u65bc\u7c21\u55ae\u3002\u7121\u8ad6\u641c\u96c6\u518d\u591a\u7684\u8cc7\u6599\uff0c\u7dda\u6027\u7684\u6a21\u578b\u6c38\u9060\u7121\u6cd5\u64ec\u5408\u975e\u7dda\u6027\u7684\u66f2\u7dda\u3002\u56e0\u70ba\u6bd4\u8f03\u7c21\u55ae\u7684\u6a21\u578b\uff0c\u4ed6\u53d7\u5230\u4e0d\u540c\u7684\u8cc7\u6599\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002 \u7c21\u55ae\u7684\u6a21\u578b\u6709\u5927\u7684 bias\uff0c\u5c0f\u7684 variance\u3002 Error \u4f86\u81ea\u65bc bias \u5f88\u5927\uff0c\u7a31\u70ba\u6b20\u64ec\u5408\u3002 Error from Variance \u65b9\u5dee(variance)\u662f\u6307\u4f60\u7684\u6a21\u578b\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u654f\u611f\u7a0b\u5ea6\u3002\u4e00\u500b\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u5c0e\u81f4\u8f38\u51fa\u7684\u8b8a\u7570\u6027\u975e\u5e38\u5927\u3002\u6a21\u578b\u6b7b\u80cc\u6240\u6709\u8a13\u7df4\u96c6\u4e2d\u7684\u6578\u64da\u9ede\u6703\u5c0e\u81f4\u4e00\u500b\u554f\u984c\u767c\u751f\u3002\u7576\u4f60\u7684\u8a13\u7df4\u8cc7\u6599\u6709\u9700\u591a\u7684\u96a8\u6a5f\u8aa4\u5dee\u6216\u662f\u96e2\u7fa4\u503c\u6642\uff0c\u6211\u5011\u53c8\u628a\u9019\u4e9b\u7570\u5e38\u503c\u5168\u90e8\u64ec\u5408\u9032\u6a21\u578b\u88e1\u9762\uff0c\u5c0e\u81f4\u5b78\u51fa\u4f86\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u540c\u6642\u964d\u4f4e\u6cdb\u5316\u80fd\u529b\uff0c\u5c0d\u65bc\u672a\u77e5\u7684\u8cc7\u6599\u9810\u6e2c\u7684\u80fd\u529b\u5c31\u6703\u5f88\u5dee\uff0c\u540c\u6642\u9020\u5c31\u4e86\u5f88\u9ad8\u7684 variance error\u3002\u56e0\u6b64\u9019\u6a23\u7684\u7d50\u679c\u6211\u5011\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u8f03\u8907\u96dc\u7684\u6a21\u578b\u6709\u5c0f\u7684 bias\uff0c\u5927\u7684 variance\u3002 Error \u4f86\u81ea\u65bc variance \u5f88\u5927\uff0c\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u5982\u4f55\u907f\u514d\u6b20\u64ec\u5408\uff1f \u901a\u5e38 bias \u5927\u800c\u5c0e\u81f4\u6a21\u578b\u904e\u65bc\u7c21\u55ae\uff0c\u800c\u7121\u6cd5\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u589e\u52a0\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u505a\u4e00\u4e9b\u7279\u5fb5\u5de5\u7a0b\u8b93\u6a21\u578b\u89c0\u5bdf\u591a\u9ede\u7dda\u7d22\u3002\u6216\u662f\u8abf\u6574\u6a21\u578b\u7684\u6f14\u7b97\u6cd5\uff0c\u4f7f\u6a21\u578b\u66f4\u8907\u96dc\u3002\u4f8b\u5982\u4f7f\u7528\u9805\u6b21\u66f4\u9ad8\u7684\u591a\u9805\u5f0f\u6a21\u578b\uff0c\u6216\u662f tree-based \u6a21\u578b\u4e2d\u9069\u7576\u7684\u589e\u52a0\u6a39\u7684\u6df1\u5ea6......\u7b49\u3002\u9019\u88cf\u66f4\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c\u7576\u6a21\u578b\u6b20\u64ec\u5408\u6642\u641c\u96c6\u518d\u591a\u7684\u8a13\u7df4\u8cc7\u6599\u662f\u6c92\u6709\u7528\u7684\u3002\u56e0\u70ba\u7c21\u55ae\u7684\u6a21\u578b\u6bd4\u8f03\u4e0d\u6703\u53d7\u8cc7\u6599\u7684\u5f71\u97ff\uff0c\u6240\u4ee5 variance \u76f8\u5c0d\u7684\u6703\u6bd4\u8f03\u4f4e\u800c bias \u5927\uff0c\u4e5f\u5c31\u662f\u8f38\u51fa\u7684\u8b8a\u5316\u6027\u4e0d\u5927\u3002\u5f9e\u9019\u88e1\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u7c21\u55ae\u7684\u6a21\u578b\u53d7\u5230\u4e0d\u540c\u7684\u8f38\u5165\u8cc7\u6599\u53d7\u5230\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002\u56e0\u70ba\u6a21\u578b\u9078\u5f97\u4e0d\u597d\uff0c\u518d\u600e\u9ebc\u8a13\u7df4\u4ed6\u7684 bias \u9084\u662f\u4e00\u6a23\u5927\u3002 \u589e\u52a0\u8f38\u5165\u7279\u5fb5\u6216\u7279\u5fb5\u5de5\u7a0b \u63d0\u9ad8\u6a21\u578b\u8907\u96dc\u5ea6 \u5982\u4f55\u907f\u514d\u904e\u5ea6\u64ec\u5408\uff1f \u7576\u6a21\u578b\u904e\u65bc\u8907\u96dc\u904e\u5ea6\u64ec\u5408\u767c\u751f\u7684\u6a5f\u7387\u76f8\u5c0d\u63d0\u9ad8\uff0c\u6211\u5011\u53ef\u4ee5\u5f9e\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u89c0\u5bdf\uff0c\u5f88\u5bb9\u6613\u5730\u6aa2\u6e2c\u6a21\u578b\u662f\u5426\u904e\u5ea6\u64ec\u5408\u3002\u4f46\u662f\u6211\u5011\u61c9\u8a72\u5982\u4f55\u907f\u514d\u6a21\u578b\u592a\u904e\u65bc\u8907\u96dc\uff0c\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u767c\u751f\u5462\uff1f\u901a\u5e38\u6211\u5011\u6703\u8a3a\u65b7\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u9019\u4e9b\u932f\u8aa4\u4f86\u81ea\u65bc\u5169\u7a2e\uff0c\u5206\u5225\u70ba\u6709 bias \u8207 variance\u3002\u5982\u679c\u6211\u5011\u80fd\u5920\u8a3a\u65b7\u51fa\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u6211\u5011\u5c31\u80fd\u6311\u51fa\u9069\u7576\u7684\u65b9\u6cd5\u4f86\u6539\u5584\u6a21\u578b\u3002\u4ee5\u4e0b\u5e7e\u9ede\u6216\u8a31\u80fd\u5920\u5e6b\u52a9\u4f60\u9032\u884c\u5efa\u6a21\uff1a \u641c\u96c6\u66f4\u591a\u8a13\u7df4\u8cc7\u6599 \u589e\u52a0\u8a0a\u7df4\u96c6\u7684\u8cc7\u6599\u91cf\u662f\u6709\u6548\u63a7\u5236 variance \u7684\u65b9\u6cd5\uff0c\u4e26\u4e14\u4e0d\u6703\u589e\u52a0 bias\u3002 \u6a21\u578b\u6dfb\u52a0 Regularization \u5728\u640d\u5931\u51fd\u6578\u4e2d\u589e\u52a0\u4e00\u4e9b\u9650\u5236\u5f0f\uff0c\u964d\u4f4e\u6a21\u578b\u8907\u96dc\u3002 \u4ea4\u53c9\u9a57\u8b49 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u5207\u51fa\u9a57\u8b49\u96c6\uff0c\u4e26\u6311\u51fa\u597d\u7684\u6a21\u578b\u3002\u800c\u4e0d\u662f\u5f9e\u6e2c\u8a66\u96c6\u4e2d\u6c42\u6700\u5c0f error\u3002 Early Stopping \u8a2d\u5b9a\u7576\u6a21\u578b\u9023\u7e8c\u5e7e\u5e36\u90fd\u7121\u6cd5\u6539\u5584 error\uff0c\u5c31\u7acb\u5373\u7d42\u6b62\u8a13\u7df4\u3002 Ensembling \u900f\u904e\u8a13\u7df4\u591a\u500b\u6a21\u578b\uff0c\u4e26\u53d6\u5f97\u6bcf\u500b\u6a21\u578b\u9810\u6e2c\u4e26\u5e73\u5747\u4f5c\u70ba\u6700\u7d42\u8f38\u51fa\u3002 Reference Overfitting in Machine Learning: What It Is and How to Prevent It WTF is the Bias-Variance Tradeoff? (Infographic) \u3010\u6a5f\u5668\u5b78\u7fd2\u3011\u504f\u5dee\u8207\u65b9\u5dee\u4e4b\u6b0a\u8861 Bias-Variance Tradeoff \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#day-24-","text":"","title":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_1","text":"\u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u6df1\u5165\u7406\u89e3\u5ea6\u64ec\u5408\u8207\u6b20\u64ec\u5408 Bias-Variance Tradeoff \u5982\u4f55\u907f\u514d\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408\uff1f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_2","text":"\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5927\u5bb6\u53ef\u80fd\u6700\u5e38\u898b\u7684\u554f\u984c\u662f\uff0c\u7576\u8a13\u7df4\u597d\u4e86\u6a21\u578b\u4e26\u5728\u6e2c\u8a66\u8cc7\u6599\u4e5f\u7372\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\u3002\u65bc\u662f\u5f88\u958b\u5fc3\u7684\u843d\u5730\u4e26\u90e8\u7f72\u5230\u771f\u5be6\u5834\u57df\u4e2d\uff0c\u6b8a\u4e0d\u77e5\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u9060\u9060\u4e0d\u5982\u9810\u671f\u3002\u6211\u60f3\u9019\u500b\u75db\u9ede\u5927\u5bb6\u61c9\u8a72\u90fd\u7d93\u6b77\u904e\uff0c\u5c24\u5176\u662f\u6a5f\u5668\u5b78\u7fd2\u7684\u65b0\u624b\u3002\u9019\u7a2e\u60c5\u6cc1\u5c31\u662f\u6240\u8b02\u7684\u904e\u5ea6\u64ec\u5408\uff0c\u5b83\u662f\u4e00\u500b\u5728\u6a5f\u5668\u5b78\u7fd2\u9818\u57df\u4e2d\u975e\u5e38\u68d8\u624b\u7684\u7684\u554f\u984c\u3002\u7576\u4f60\u7684\u6a21\u578b\u904e\u5ea6\u7684\u64ec\u5408\u8a13\u7df4\u96c6\uff0c\u9019\u610f\u5473\u8457\u4f60\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u7684\u53bb\u8a18\u4f4f\u6240\u6709\u73fe\u6709\u7684\u6578\u64da\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u4e0d\u4f73\uff0c\u9019\u4e0d\u662f\u6211\u5011\u671f\u671b\u7684\u3002\u6240\u8b02\u7684\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u662f\u6307\uff0c\u7576\u6211\u5229\u7528\u8a13\u7df4\u96c6\u8a13\u7df4\u4e00\u500b\u6a21\u578b\u5f8c\u518d\u62ff\u53e6\u4e00\u7d44\u6a21\u578b\u6c92\u770b\u904e\u7684\u8cc7\u6599\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u7684\u9810\u6e2c\u7d50\u679c\u5982\u679c\u5728\u6c92\u770b\u904e\u7684\u8cc7\u6599\u4e2d\u4f9d\u7136\u4fdd\u6301\u4e0d\u932f\u7684\u8868\u73fe\u6211\u5011\u5c31\u53ef\u4ee5\u8aaa\u6b64\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u5f37\u3002\u4eca\u5929\u6211\u5011\u5c07\u4f86\u8a73\u7d30\u63a2\u8a0e\u4f55\u8b02\u904e\u5ea6\u64ec\u5408\uff0c\u4ee5\u53ca\u8a72\u5982\u4f55\u53bb\u89e3\u6c7a\u5b83\u4f7f\u5f97\u6a21\u578b\u8655\u65bc\u4e00\u500b\u9069\u7576\u7684\u72c0\u614b\u3002","title":"\u524d\u8a00"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_3","text":"\u901a\u5e38\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u8981\u8207\u5be6\u969b\u7684\u6578\u503c\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u4e5f\u5c31\u662f\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6211\u5011\u8981\u60f3\u8fa6\u6cd5\u6700\u5c0f\u5316\u8aa4\u5dee\u4f7f\u5f97\u6a21\u578b\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8a55\u4f30\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u597d\u58de\u5462\uff1f\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u8981\u8a13\u7df4\u4e00\u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u6700\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u627e\u51fa\u4e00\u689d\u7dda\u5920\u5c07\u9019\u5169\u500b\u985e\u5225\u5b8c\u6574\u5730\u5206\u958b\uff0c\u7136\u800c\u9019\u4e00\u689d\u5207\u5272\u7684\u7dda\u8981\u9577\u5f97\u600e\u6a23\u624d\u662f\u597d\u7684\u6a21\u578b\u5462\uff1f\u5f9e\u4e0b\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5b8c\u6574\u7684\u64ec\u5408\u65bc\u8a13\u7df4\u8cc7\u6599\uff0c\u800c\u7d05\u8272\u5be6\u7dda\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u6bd4\u8f03\u6c92\u6709\u90a3\u9ebc\u56b4\u53b2\uff0c\u5728\u5169\u500b\u985e\u5225\u9593\u9069\u7576\u7684\u627e\u51fa\u4e00\u689d\u5e73\u6ed1\u7684\u66f2\u7dda\u4f86\u5340\u9694\u5169\u985e\u7684\u8cc7\u6599\u3002 \u63a5\u8457\u6211\u5011\u62ff\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u9810\u6e2c\uff0c\u53ef\u4ee5\u767c\u73fe\u7531\u65bc\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u5b8c\u6574\u8a18\u4f4f\u4e86\u8a13\u7df4\u96c6\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u5728\u65b0\u7684\u6c92\u770b\u904e\u7684\u8cc7\u6599\u8868\u73fe\u5c31\u6c92\u6709\u90a3\u9ebc\u597d\u4e86\u3002\u5c24\u5176\u662f\u5728\u5169\u985e\u5225\u5206\u9694\u7dda\u9644\u8fd1\u7684\u8cc7\u6599\u6700\u80fd\u770b\u51fa\u7aef\u502a\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u5f88\u78ba\u5b9a\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u4e86\u3002\u53e6\u5916\u7d05\u8272\u5be6\u73fe\u7684\u6a21\u578b\u96d6\u7136\u5728\u8a13\u7df4\u96c6\u4e2d\u6709\u5e7e\u7b46\u6703\u9810\u6e2c\u932f\u8aa4\uff0c\u4f46\u662f\u5b83\u518d\u6e2c\u8a66\u96c6\u8cc7\u6599\u4e2d\u4e00\u6a23\u4fdd\u6301\u7a69\u5b9a\u7684\u9810\u6e2c\u80fd\u529b\u3002 \u5f9e\u4e0a\u8ff0\u7684\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\uff0c\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e26\u975e\u8a13\u7df4\u96c6\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u6211\u5011\u5fc5\u9808\u540c\u6642\u62ff\u6e2c\u8a66\u96c6\u9a57\u8b49\u6a21\u578b\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u8aa4\u5dee\u8981\u8d8a\u8fd1\u8d8a\u597d\u3002 \u4e00\u500b\u9069\u7576\u7684\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\u5305\u62ec\uff1a - \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 - \u8cc7\u6599\u8996\u89ba\u5316\u8207\u524d\u8655\u7406 - \u5c0b\u627e\u9069\u5408\u7684\u6a21\u578b - \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578 - \u4f7f\u7528\u9069\u7576\u7684\u6307\u6a19\u8a55\u4f30\u6a21\u578b - \u4ea4\u53c9\u9a57\u8b49\u6a21\u578b","title":"\u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#overfitting-vs-underfitting","text":"\u904e\u5ea6\u64ec\u5408\u7684\u53cd\u7fa9\u5c31\u662f\u6b20\u64ec\u5408\uff0c\u5f9e\u5b57\u9762\u4e0a\u53ef\u4ee5\u5f97\u77e5\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u662f\u4e0d\u597d\u7684\u3002\u7576\u6a21\u578b\u592a\u7c21\u55ae\u6642\u6703\u767c\u751f\u6b20\u64ec\u5408\uff0c\u6216\u662f\u52a0\u5165\u592a\u591a\u7684 L1/L2 \u6b63\u5247\u5316\u9650\u5236\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u4f7f\u6a21\u578b\u5728\u5f9e\u6578\u64da\u96c6\u4e2d\u5b78\u7fd2\u6642\u8b8a\u5f97\u4e0d\u9748\u6d3b\u3002\u4e00\u500b\u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u5728\u9810\u6e2c\u4e2d\u5f80\u5f80\u5177\u6709\u8f03\u5c0f\u7684\u65b9\u5dee(variance)\u800c\u5c0e\u81f4\u504f\u5dee(bias)\u5c31\u6703\u8b8a\u5927\u3002\u76f8\u53cd\u7684\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u6709\u8f03\u7684\u8b8a\u7570\u9032\u800c\u5c0e\u81f4\u65b9\u5dee\u5927\uff0c\u540c\u6642\u504f\u5dee\u6703\u8b8a\u5c0f\u3002\u504f\u5dee\u548c\u65b9\u5dee\u90fd\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684\u9810\u6e2c\u8aa4\u5dee\u7684\u65b9\u5f0f\u3002\u5728\u4e00\u822c\u60c5\u6cc1\u4e0b\u6211\u5011\u53ef\u4ee5\u6e1b\u5c11\u504f\u5dee\u6240\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u4f46\u53ef\u80fd\u6703\u5c0e\u81f4\u589e\u52a0\u65b9\u5dee\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u53cd\u4e4b\u4ea6\u7136\u3002 \u9019\u88e1\u6211\u5011\u5c31\u8981\u4f86\u601d\u8003\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e2d\u7684 error \u5f9e\u4f55\u800c\u4f86\uff1f\u6a21\u578b\u4e2d\u7684 error \u662f\u5224\u65b7\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u4f9d\u64da\uff0c\u4f46\u5176\u5be6\u6211\u5011\u53ef\u4ee5\u5c07 error \u62c6\u5206\u6210\u5169\u5927\u90e8\u5206\u3002\u5206\u5225\u6709 Bias \u8207 Variance \u5169\u500b\u90e8\u5206\u3002\u4ee5\u5be6\u969b\u4f8b\u5b50\u4f86\u8aaa\uff0c\u5047\u8a2d\u8f38\u51fa y \u662f\u8f38\u5165 x \u771f\u6b63\u7684\u7b54\u6848\uff0c\u800c \u0177 \u5247\u662f\u900f\u904e\u6a21\u578b f(x) \u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u503c\uff0c\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u7684\u7d50\u679c\u8981\u8207\u771f\u5be6\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u7576 \u0177\u2260y \u6642\u5c31\u6703\u7522\u751f error (\u8aa4\u5dee)\u3002","title":"Overfitting vs. Underfitting"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#bias-variance-tradeoff","text":"\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u5b58\u5728\u8457\u4e00\u4e9b\u95dc\u4fc2\uff0c\u6211\u5011\u5fc5\u9808\u5f9e\u4e2d\u627e\u5230\u4e00\u500b\u9069\u7576\u7684\u5e73\u8861\u9ede\u3002\u56e0\u6b64\u6211\u5011\u5e0c\u671b\u900f\u904e\u6b0a\u8861 bias error \u8ddf variance error \u4f86\u4f7f\u5f97\u7e3d\u8aa4\u5dee\u9054\u5230\u6700\u5c0f\u3002\u6211\u5011\u5e38\u6703\u4ee5\u6253\u9776\u4f8b\u5b50\u89e3\u91cb\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u7684\u95dc\u806f\u6027\u3002\u5047\u8a2d\u6211\u5011\u767c\u5c04\u5341\u6b21\uff0c\u6211\u5011\u8aac\u4e00\u500b\u4eba\u7684\u6253\u9776\u6280\u8853\u5f88\u7cbe\u6e96\u3002\u5176\u4e2d\u7684 \u7cbe \u5c31\u8868\u793a\u9019\u5341\u500b\u628a\u9762\u4e0a\u7684\u9ede\u5f7c\u6b64\u9593\u8ddd\u96e2\u90fd\u76f8\u7576\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u7684\u65b9\u5dee\u975e\u5e38\u4f4e(low variance)\u3002\u53e6\u5916\u6240\u8b02\u7684 \u6e96 \u5c31\u8868\u793a\u9019\u5341\u500b\u9ede\u90fd\u96e2\u6e96\u5fc3\u5f88\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u5011\u7684\u504f\u5dee\u975e\u5e38\u4f4e(low bias)\u3002 Underfitting: \u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u4f7f\u5f97\u9810\u6e2c\u7d50\u679c\u5f48\u6027\u4e0d\u9ad8\uff0c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u90fd\u4e0d\u597d\u3002low variance (high bias)\u3002 Overfitting: \u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u4f7f\u5f97\u8a13\u7df4\u96c6\u5b8c\u6574\u7684\u88ab\u64ec\u5408\uff0c\u56e0\u6b64\u8a13\u7df4\u96c6\u8868\u73fe\u6975\u597d\uff0c\u4f46\u6e2c\u8a66\u96c6\u8868\u73fe\u4e0d\u4f73\u3002high variance (low bias)\u3002","title":"Bias-Variance Tradeoff"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#error-from-bias","text":"\u504f\u5dee(bias)\u5c31\u662f\u6a21\u578b\u7684\u9810\u6e2c\u8207\u771f\u5be6\u503c\u4e4b\u9593\u7684\u5dee\u7570\u3002\u4e00\u822c\u6211\u5011\u8a13\u7df4\u6a21\u578b\u662f\u671f\u671b\u9810\u6e2c\u7684\u503c\u8981\u8207\u5be6\u969b\u7684\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u7136\u800c\u7576\u4e00\u500b\u7c21\u55ae\u7684\u7dda\u6027\u6a21\u578b\u53ef\u80fd\u7121\u6cd5\u5b8c\u6574\u5730\u64ec\u5408\u5230\u4e00\u500b\u8907\u96dc\u975e\u7dda\u6027\u7684\u8cc7\u6599\u96c6\u3002\u56e0\u6b64\u5982\u4e0b\u5716\u6240\u793a\uff0c\u7576\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u504f\u5dee\u904e\u5927\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u8a72\u6a21\u578b\u904e\u65bc\u7c21\u55ae\u3002\u7121\u8ad6\u641c\u96c6\u518d\u591a\u7684\u8cc7\u6599\uff0c\u7dda\u6027\u7684\u6a21\u578b\u6c38\u9060\u7121\u6cd5\u64ec\u5408\u975e\u7dda\u6027\u7684\u66f2\u7dda\u3002\u56e0\u70ba\u6bd4\u8f03\u7c21\u55ae\u7684\u6a21\u578b\uff0c\u4ed6\u53d7\u5230\u4e0d\u540c\u7684\u8cc7\u6599\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002 \u7c21\u55ae\u7684\u6a21\u578b\u6709\u5927\u7684 bias\uff0c\u5c0f\u7684 variance\u3002 Error \u4f86\u81ea\u65bc bias \u5f88\u5927\uff0c\u7a31\u70ba\u6b20\u64ec\u5408\u3002","title":"Error from Bias"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#error-from-variance","text":"\u65b9\u5dee(variance)\u662f\u6307\u4f60\u7684\u6a21\u578b\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u654f\u611f\u7a0b\u5ea6\u3002\u4e00\u500b\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u5c0e\u81f4\u8f38\u51fa\u7684\u8b8a\u7570\u6027\u975e\u5e38\u5927\u3002\u6a21\u578b\u6b7b\u80cc\u6240\u6709\u8a13\u7df4\u96c6\u4e2d\u7684\u6578\u64da\u9ede\u6703\u5c0e\u81f4\u4e00\u500b\u554f\u984c\u767c\u751f\u3002\u7576\u4f60\u7684\u8a13\u7df4\u8cc7\u6599\u6709\u9700\u591a\u7684\u96a8\u6a5f\u8aa4\u5dee\u6216\u662f\u96e2\u7fa4\u503c\u6642\uff0c\u6211\u5011\u53c8\u628a\u9019\u4e9b\u7570\u5e38\u503c\u5168\u90e8\u64ec\u5408\u9032\u6a21\u578b\u88e1\u9762\uff0c\u5c0e\u81f4\u5b78\u51fa\u4f86\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u540c\u6642\u964d\u4f4e\u6cdb\u5316\u80fd\u529b\uff0c\u5c0d\u65bc\u672a\u77e5\u7684\u8cc7\u6599\u9810\u6e2c\u7684\u80fd\u529b\u5c31\u6703\u5f88\u5dee\uff0c\u540c\u6642\u9020\u5c31\u4e86\u5f88\u9ad8\u7684 variance error\u3002\u56e0\u6b64\u9019\u6a23\u7684\u7d50\u679c\u6211\u5011\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u8f03\u8907\u96dc\u7684\u6a21\u578b\u6709\u5c0f\u7684 bias\uff0c\u5927\u7684 variance\u3002 Error \u4f86\u81ea\u65bc variance \u5f88\u5927\uff0c\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002","title":"Error from Variance"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_4","text":"\u901a\u5e38 bias \u5927\u800c\u5c0e\u81f4\u6a21\u578b\u904e\u65bc\u7c21\u55ae\uff0c\u800c\u7121\u6cd5\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u589e\u52a0\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u505a\u4e00\u4e9b\u7279\u5fb5\u5de5\u7a0b\u8b93\u6a21\u578b\u89c0\u5bdf\u591a\u9ede\u7dda\u7d22\u3002\u6216\u662f\u8abf\u6574\u6a21\u578b\u7684\u6f14\u7b97\u6cd5\uff0c\u4f7f\u6a21\u578b\u66f4\u8907\u96dc\u3002\u4f8b\u5982\u4f7f\u7528\u9805\u6b21\u66f4\u9ad8\u7684\u591a\u9805\u5f0f\u6a21\u578b\uff0c\u6216\u662f tree-based \u6a21\u578b\u4e2d\u9069\u7576\u7684\u589e\u52a0\u6a39\u7684\u6df1\u5ea6......\u7b49\u3002\u9019\u88cf\u66f4\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c\u7576\u6a21\u578b\u6b20\u64ec\u5408\u6642\u641c\u96c6\u518d\u591a\u7684\u8a13\u7df4\u8cc7\u6599\u662f\u6c92\u6709\u7528\u7684\u3002\u56e0\u70ba\u7c21\u55ae\u7684\u6a21\u578b\u6bd4\u8f03\u4e0d\u6703\u53d7\u8cc7\u6599\u7684\u5f71\u97ff\uff0c\u6240\u4ee5 variance \u76f8\u5c0d\u7684\u6703\u6bd4\u8f03\u4f4e\u800c bias \u5927\uff0c\u4e5f\u5c31\u662f\u8f38\u51fa\u7684\u8b8a\u5316\u6027\u4e0d\u5927\u3002\u5f9e\u9019\u88e1\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u7c21\u55ae\u7684\u6a21\u578b\u53d7\u5230\u4e0d\u540c\u7684\u8f38\u5165\u8cc7\u6599\u53d7\u5230\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002\u56e0\u70ba\u6a21\u578b\u9078\u5f97\u4e0d\u597d\uff0c\u518d\u600e\u9ebc\u8a13\u7df4\u4ed6\u7684 bias \u9084\u662f\u4e00\u6a23\u5927\u3002 \u589e\u52a0\u8f38\u5165\u7279\u5fb5\u6216\u7279\u5fb5\u5de5\u7a0b \u63d0\u9ad8\u6a21\u578b\u8907\u96dc\u5ea6","title":"\u5982\u4f55\u907f\u514d\u6b20\u64ec\u5408\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_5","text":"\u7576\u6a21\u578b\u904e\u65bc\u8907\u96dc\u904e\u5ea6\u64ec\u5408\u767c\u751f\u7684\u6a5f\u7387\u76f8\u5c0d\u63d0\u9ad8\uff0c\u6211\u5011\u53ef\u4ee5\u5f9e\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u89c0\u5bdf\uff0c\u5f88\u5bb9\u6613\u5730\u6aa2\u6e2c\u6a21\u578b\u662f\u5426\u904e\u5ea6\u64ec\u5408\u3002\u4f46\u662f\u6211\u5011\u61c9\u8a72\u5982\u4f55\u907f\u514d\u6a21\u578b\u592a\u904e\u65bc\u8907\u96dc\uff0c\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u767c\u751f\u5462\uff1f\u901a\u5e38\u6211\u5011\u6703\u8a3a\u65b7\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u9019\u4e9b\u932f\u8aa4\u4f86\u81ea\u65bc\u5169\u7a2e\uff0c\u5206\u5225\u70ba\u6709 bias \u8207 variance\u3002\u5982\u679c\u6211\u5011\u80fd\u5920\u8a3a\u65b7\u51fa\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u6211\u5011\u5c31\u80fd\u6311\u51fa\u9069\u7576\u7684\u65b9\u6cd5\u4f86\u6539\u5584\u6a21\u578b\u3002\u4ee5\u4e0b\u5e7e\u9ede\u6216\u8a31\u80fd\u5920\u5e6b\u52a9\u4f60\u9032\u884c\u5efa\u6a21\uff1a \u641c\u96c6\u66f4\u591a\u8a13\u7df4\u8cc7\u6599 \u589e\u52a0\u8a0a\u7df4\u96c6\u7684\u8cc7\u6599\u91cf\u662f\u6709\u6548\u63a7\u5236 variance \u7684\u65b9\u6cd5\uff0c\u4e26\u4e14\u4e0d\u6703\u589e\u52a0 bias\u3002 \u6a21\u578b\u6dfb\u52a0 Regularization \u5728\u640d\u5931\u51fd\u6578\u4e2d\u589e\u52a0\u4e00\u4e9b\u9650\u5236\u5f0f\uff0c\u964d\u4f4e\u6a21\u578b\u8907\u96dc\u3002 \u4ea4\u53c9\u9a57\u8b49 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u5207\u51fa\u9a57\u8b49\u96c6\uff0c\u4e26\u6311\u51fa\u597d\u7684\u6a21\u578b\u3002\u800c\u4e0d\u662f\u5f9e\u6e2c\u8a66\u96c6\u4e2d\u6c42\u6700\u5c0f error\u3002 Early Stopping \u8a2d\u5b9a\u7576\u6a21\u578b\u9023\u7e8c\u5e7e\u5e36\u90fd\u7121\u6cd5\u6539\u5584 error\uff0c\u5c31\u7acb\u5373\u7d42\u6b62\u8a13\u7df4\u3002 Ensembling \u900f\u904e\u8a13\u7df4\u591a\u500b\u6a21\u578b\uff0c\u4e26\u53d6\u5f97\u6bcf\u500b\u6a21\u578b\u9810\u6e2c\u4e26\u5e73\u5747\u4f5c\u70ba\u6700\u7d42\u8f38\u51fa\u3002","title":"\u5982\u4f55\u907f\u514d\u904e\u5ea6\u64ec\u5408\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#reference","text":"Overfitting in Machine Learning: What It Is and How to Prevent It WTF is the Bias-Variance Tradeoff? (Infographic) \u3010\u6a5f\u5668\u5b78\u7fd2\u3011\u504f\u5dee\u8207\u65b9\u5dee\u4e4b\u6b0a\u8861 Bias-Variance Tradeoff \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/","text":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5e38\u898b\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u6cd5 K-fold Leave one out cross validation Random Subsampling Bootstrap \u524d\u8a00 \u70ba\u4e86\u907f\u514d\u6a21\u578b\u8a13\u7df4\u767c\u751f\u904e\u5ea6\u64ec\u5408\uff0c\u901a\u5e38\u6211\u5011\u9084\u6703\u5f9e\u8a13\u7df4\u96c6\u5207\u4e00\u5c0f\u90e8\u5206\u8cc7\u6599\u51fa\u4f86\u9032\u884c\u9a57\u8b49\u3002\u9a57\u8b49\u96c6\u7684\u7528\u8655\u5247\u662f\u7528\u4f86\u6aa2\u8996\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6bcf\u6b21\u7684\u8fed\u4ee3\u7d50\u679c\u8a13\u7df4\u7684\u597d\u4e0d\u597d\u3002\u4f46\u8a72\u5982\u4f55\u5207\u51fa\u9019\u500b\u9a57\u8b49\u96c6\u6bd4\u8f03\u6709\u516c\u4fe1\u529b\u5462\uff1f\u5982\u679c\u6211\u5011\u50c5\u5207\u4e00\u5c0f\u4efd\u7684\u8cc7\u6599\u4ed6\u662f\u80fd\u6709\u6709\u6548\u7684\u8a55\u4f30\u8a13\u7df4\u6642\u6a21\u578b\u7684\u597d\u58de\u55ce\uff1f\u5728\u67d0\u4e9b\u60c5\u6cc1\u5e95\u4e0b\u55ae\u7d14\u76f4\u63a5\u5f9e\u8cc7\u6599\u96c6\u88e1\u9762\u5207\u4e00\u584a\u51fa\u4f86\u7576\u9a57\u8b49\u96c6\uff0c\u662f\u6c92\u6709\u8fa6\u6cd5\u5f88\u6709\u6548\u7684\u53bb\u8a55\u4f30\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7684\u597d\u58de\u3002\u8aaa\u4e0d\u5b9a\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u9019\u4e00\u4efd\u9a57\u8b49\u96c6\u6070\u597d\u8868\u73fe\u5f97\u4e0d\u932f\uff0c\u5982\u679c\u53c8\u96a8\u6a5f\u62bd\u53e6\u4e00\u4efd\u8cc7\u6599\u4f86\u7576\u9a57\u8b49\u96c6\u8aaa\u4e0d\u5b9a\u7d50\u679c\u6703\u8b8a\u5f97\u5f88\u7cdf\u7cd5\u3002\u9019\u5c31\u8868\u793a\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u4e0d\u8db3\u3002\u70ba\u4e86\u907f\u514d\u9019\u7a2e\u60c5\u6cc1\u767c\u751f\u4e26\u4e14\u6709\u6548\u7684\u5207\u5272\u9a57\u8b49\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7684\u6280\u5de7\u4f86\u7372\u5f97\u6700\u4f73\u9a57\u8b49\u3002 \u4ec0\u9ebc\u662f\u4ea4\u53c9\u9a57\u8b49\uff1f \u5728\u89e3\u91cb\u4ea4\u53c9\u9a57\u8b49\u4e4b\u524d\u6211\u5011\u5148\u4f86\u8a0e\u8ad6\u5c07\u8cc7\u6599\u96c6\u5207\u5206\u70ba\u8a13\u7df4\u96c6\u3001\u6e2c\u8a66\u96c6\u548c\u9a57\u8b49\u96c6\u7684\u554f\u984c\u3002\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5148\u5207\u5272\u6210\u5169\u7b49\u4efd\uff0c\u5206\u5225\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u968e\u6bb5\u6a21\u578b\u53ea\u6703\u5c0d\u8a13\u7df4\u96c6\u9032\u884c\u64ec\u5408\uff0c\u53e6\u5916\u6e2c\u8a66\u96c6\u7684\u8cc7\u6599\u4e26\u672a\u53c3\u8207\u8a13\u7df4\uff0c\u56e0\u6b64\u53ef\u4ee5\u62ff\u4f86\u7576\u4f5c\u6700\u7d42\u8a55\u4f30\u6a21\u578b\u7684\u597d\u58de\u3002\u4f46\u662f\u6211\u5011\u8a13\u7df4\u7684\u6a21\u578b\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u90fd\u6709\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u4e5f\u5c31\u662f\u8aaa loss \u8981\u8d8a\u4f4e\u8d8a\u597d\u3002\u56e0\u6b64\u6700\u5e38\u898b\u7684\u4f5c\u6cd5\u6703\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u5207\u51fa\u4e00\u500b\u9a57\u8b49\u96c6\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u7684\u6a21\u578b\u53c3\u6578\uff0c\u4f7f\u5f97\u9a57\u8b49\u96c6\u7684\u8868\u73fe\u8981\u6700\u597d\u3002\u4f46\u662f\u70ba\u4e86\u907f\u514d\u6a21\u578b\u5c0d\u65bc\u6211\u5011\u6240\u5207\u7684\u9a57\u8b49\u96c6\u904e\u5ea6\u64ec\u5408\uff0c\u56e0\u6b64\u53ef\u5df2\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u7684\u65b9\u6cd5\u5c0d\u6a21\u578b\u505a\u66f4\u597d\u7684\u8a55\u4f30\u3002\u6240\u8b02\u7684\u4ea4\u53c9\u9a57\u8b49\u7c21\u55ae\u4f86\u8aaa\u662f\u5c07\u8a13\u7df4\u8cc7\u6599\u9032\u884c\u5206\u7d44\uff0c\u4e00\u90e8\u5206\u505a\u70ba\u8a13\u7df4\u5b50\u96c6\u4f86\u8a13\u7df4\u6a21\u578b\uff0c\u53e6\u4e00\u90e8\u5206\u505a\u70ba\u9a57\u8b49\u5b50\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\u3002\u7528\u8a13\u7df4\u5b50\u96c6\u7684\u6578\u64da\u5148\u8a13\u7df4\u6a21\u578b\uff0c\u7136\u5f8c\u7528\u9a57\u8b49\u5b50\u96c6\u53bb\u8dd1\u4e00\u904d\uff0c\u770b\u9a57\u8b49\u96c6\u7684\u640d\u5931\u51fd\u6578(loss)\u6216\u662f\u5206\u985e\u6e96\u78ba\u7387\u7b49\u3002\u7b49\u6a21\u578b\u8a13\u7df4\u597d\u4e4b\u5f8c\uff0c\u518d\u7528\u6e2c\u8a66\u96c6\u53bb\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e3b\u8981\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\u6709\u4ee5\u4e0b\u5e7e\u500b\u65b9\u6cd5: Holdout K-fold Leave one out cross validation Random Subsampling Bootstrap Holdout Method \u6b64\u65b9\u6cd5\u662f\u6700\u7d93\u5178\u4e14\u6700\u7c21\u55ae\u5be6\u4f5c\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\uff0cHoldout \u9867\u540d\u601d\u7fa9\u5c31\u662f\u5c07\u8cc7\u6599\u5207\u51fa\u4e00\u90e8\u5206\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u7684\u4f9d\u64da\u3002\u5728\u9019\u7a2e\u65b9\u6cd5\u4e2d\uff0c\u6211\u5011\u5c07\u8cc7\u6599\u96a8\u6a5f\u5206\u70ba\u4e09\u90e8\u5206\uff1a\u8a13\u7df4\u96c6\u3001\u9a57\u8b49\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53ea\u6709\u8a13\u7df4\u96c6\u8cc7\u6599\u5be6\u969b\u53c3\u8207\u8a13\u7df4\uff0c\u5176\u9918\u7684\u8cc7\u6599\u50c5\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u597d\u58de\u3002\u9a57\u8b49\u96c6\u4f7f\u7528\u6642\u6a5f\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u6aa2\u8996\u8a13\u7df4\u7684\u8da8\u52e2\uff0c\u82e5\u6709\u767c\u73fe\u904e\u64ec\u5408\u64ec\u5408\u8de1\u8c61\u53ef\u4ee5\u63d0\u65e9\u767c\u73fe\u4e26\u89e3\u6c7a\u3002\u4ee5\u53ca\u65b9\u4fbf\u6211\u5011\u9032\u884c\u8abf\u6574\u8d85\u53c3\u6578\u4ee5\u53ca\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\u3002\u7576\u7136\u50c5\u900f\u904e\u9a57\u8b49\u96c6\u4e0d\u80fd\u4ee3\u8868\u5168\u90e8\uff0c\u56e0\u6b64\u6700\u5f8c\u78ba\u5b9a\u597d\u6a21\u578b\u6642\u3002\u6211\u5011\u6703\u518d\u62ff\u4e8b\u5148\u5207\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6700\u7d42\u7684\u8a55\u4f30\uff0c\u6aa2\u8996\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002 \u53c3\u8003 \u512a\u9ede: 1. \u7c21\u55ae\u5be6\u4f5c\u3002 2. \u9a57\u8b49\u96c6\u53ef\u4ee5\u88ab\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u7684\u5b78\u7fd2\u6210\u679c\u3002 3. \u6e2c\u8a66\u96c6\u53ef\u4ee5\u8a55\u4f30\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002 \u7f3a\u9ede: 1. \u7576\u8cc7\u6599\u96c6\u8b8a\u7570\u91cf\u8f03\u5927\u6642\uff0c\u9a57\u8b49\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u80fd\u7121\u6cd5\u8db3\u4ee5\u8a55\u4f30\u6a21\u578b\u3002 2. \u4e0d\u9069\u5408\u7528\u5728\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 K-fold Cross-Validation \u4e0a\u4e00\u500b\u65b9\u6cd5\u96d6\u7136\u7c21\u55ae\uff0c\u4f46\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u50c5\u5207\u4e00\u4efd\u9a57\u8b49\u96c6\u5f80\u5f80\u4e0d\u80fd\u5920\u4ee3\u8868\u5168\u90e8\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u4e00\u4e9b\u6280\u5de7\u5207\u5272\u9a57\u8b49\u96c6\uff0c\u4f7f\u5f97\u8a13\u7df4\u904e\u7a0b\u4e2d\u6709\u4e00\u500b\u66f4\u516c\u6b63\u7684\u8a55\u4f30\u65b9\u5f0f\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e K-Fold \u65b9\u6cd5\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u4f9d\u5e8f\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0cK-Fold \u88e1\u9762\u7684\u6e2c\u8a66\u96c6\u53ef\u4ee5\u7576\u6210\u9a57\u8b49\u96c6\u3002K-Fold \u7684\u65b9\u6cd5\u4e2d K \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u4e2d\u6703\u9078\u64c7\u4e00\u7d44\u4f5c\u70ba\u9a57\u8b49\u96c6\uff0c\u5176\u9918 (k-1) \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u5b78\u7fd2\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 \u512a\u9ede: 1. \u964d\u4f4e\u6a21\u578b\u8a13\u7df4\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u504f\u5dee\u3002 2. \u8a13\u7df4\u96c6\u8207\u9a57\u8b49\u96c6\u5b8c\u6574\u88ab\u5145\u5206\u5229\u7528\u8207\u5b78\u7fd2\u3002 \u7f3a\u9ede: 1. \u4e0d\u9069\u5408\u7528\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 2. \u5982\u679c\u8981\u7c21\u55ae\u7684 K-fold \u4f86\u5c0b\u627e\u8d85\u53c3\u6578\u6703\u6709\u8cc7\u6599\u6d29\u6f0f\u554f\u984c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u6709\u504f\u5dee\uff0c\u56e0\u70ba\u5728\u6bcf\u500b Fold \u4e2d\u90fd\u6703\u4f7f\u7528\u540c\u4e00\u7d44\u8cc7\u6599\u9032\u884c\u9a57\u8b49\u3002 3. \u5728\u76f8\u540c\u7684\u9a57\u8b49\u96c6\u8a08\u7b97\u6a21\u578b\u7684\u8aa4\u5dee\uff0c\u7576\u627e\u5230\u4e86\u6700\u4f73\u7684\u8d85\u53c3\u6578\u3002\u9019\u53ef\u80fd\u6703\u5c0e\u81f4\u91cd\u5927\u504f\u5dee\uff0c\u6709\u904e\u64ec\u5408\u64ec\u5408\u7591\u616e\u3002 Leave One Out \u6b64\u65b9\u6cd5\u662f K-fold \u5176\u4e2d\u4e00\u7a2e\u7279\u4f8b\uff0c\u7576 K \u7b49\u65bc\u8cc7\u6599\u96c6\u7684\u6578\u91cf\u6642\u5c31\u7b49\u65bc Leave One Out \u65b9\u6cd5\u3002\u4e5f\u5c31\u662f\u5728\u6bcf\u6b21\u8a13\u7df4\u6642\u50c5\u6703\u628a\u4e00\u7b46\u8cc7\u6599\u7576\u6210\u6e2c\u8a66\u8cc7\u6599\uff0c\u5176\u9918\u7684 N-1 \u7b46\u8cc7\u6599\u4f5c\u70ba\u8a13\u7df4\u6a21\u578b\u7684\u8cc7\u6599\u3002\u6b64\u4f5c\u6cd5\u76f8\u7576\u7c21\u55ae\u660e\u77ad\uff0c\u4f46\u662f\u8a13\u7df4\u8ca0\u64d4\u6703\u975e\u5e38\u91cd\u4e14\u8017\u6642\u3002\u7136\u800c Leave p-out \u662f\u53e6\u4e00\u7a2e\u6280\u5de7\uff0c\u5176\u4e2d\u7684 p \u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u6bcf\u6b21\u8a13\u7df4\u9700\u8981\u7559\u5e7e\u7b46\u8cc7\u6599\u4f5c\u70ba\u6e2c\u8a66\u96c6\u3002 \u512a\u9ede: 1. \u7c21\u55ae\u4e14\u5bb9\u6613\u7406\u89e3\uff0c\u597d\u5be6\u4f5c\u3002 \u7f3a\u9ede: 1. \u9700\u8981\u82b1\u8cbb\u66f4\u591a\u7684\u8a13\u7df4\u6642\u9593\u3002 Random Subsampling Bootstrapping \u9084\u6709\u4e00\u7a2e\u6bd4\u8f03\u7279\u6b8a\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u5f0f\uff0cBootstrapping \u81ea\u52a9\u62bd\u6a23\u6cd5\u3002\u662f\u4e00\u7a2e\u5f9e\u7d66\u5b9a\u8a13\u7df4\u96c6\u4e2d\u6709\u653e\u56de\u7684\u5747\u52fb\u62bd\u6a23\uff0c\u4e5f\u5c31\u662f\u8aaa\uff0c\u6bcf\u7576\u9078\u4e2d\u4e00\u500b\u6a23\u672c\uff0c\u5b83\u7b49\u53ef\u80fd\u5730\u88ab\u518d\u6b21\u9078\u4e2d\u4e26\u88ab\u518d\u6b21\u6dfb\u52a0\u5230\u8a13\u7df4\u96c6\u4e2d\u3002\u5047\u8a2d\u6bcf\u6b21\u8a13\u7df4\u90fd\u63a1\u6a23\u5341\u500b\u6a23\u672c\uff0c\u5728\u9019\u5341\u7b46\u8cc7\u6599\u4e2d\u5f88\u6709\u53ef\u80fd\u6703\u518d\u6b21\u88ab\u96a8\u6a5f\u62bd\u5230\u3002\u5269\u4e0b\u6c92\u6709\u62bd\u5230\u7684\u8cc7\u6599\u5247\u90fd\u8b8a\u6210\u6e2c\u8a66\u96c6\uff0c\u7528\u4f86\u8a55\u4f30\u8a13\u7df4\u5b8c\u7684\u6a21\u578b\u3002 \u5c0f\u7d50 \u4ea4\u53c9\u9a57\u8b49\u662f\u8a13\u7df4\u6a21\u578b\u4e2d\u975e\u5e38\u91cd\u8981\u7684\u6280\u5de7\uff0c\u5c24\u5176\u662f\u7576\u624b\u908a\u7684\u8cc7\u6599\u96c6\u6709\u9650\u6642\u66f4\u61c9\u8a72\u4f7f\u7528\u3002\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u6280\u5de7\uff0c\u5373\u4f7f\u5728\u6578\u64da\u6709\u9650\u7684\u60c5\u6cc1\u4e0b\uff0c\u6211\u5011\u4e5f\u80fd\u5920\u7372\u5f97\u6e96\u78ba\u7684\u7d50\u679c\uff0c\u4e26\u4e14\u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002\u4e26\u70ba\u6211\u5011\u63d0\u4f9b\u66f4\u6e96\u78ba\u7684\u6a21\u578b\u9810\u6e2c\u6027\u80fd\u4f30\u8a08\u65b9\u5f0f\uff0c\u540c\u6642\u4e5f\u80fd\u5920\u63d0\u5347\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002\u4ee5\u4e0a\u7684\u65b9\u6cd5\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 scikit-learn \u88e1\u9762 model_selection \u5e95\u4e0b\u7684 cross_val_score \u65b9\u6cd5\u9032\u884c\u5be6\u4f5c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#day-25-cross-validation","text":"","title":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_1","text":"\u5e38\u898b\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u6cd5 K-fold Leave one out cross validation Random Subsampling Bootstrap","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_2","text":"\u70ba\u4e86\u907f\u514d\u6a21\u578b\u8a13\u7df4\u767c\u751f\u904e\u5ea6\u64ec\u5408\uff0c\u901a\u5e38\u6211\u5011\u9084\u6703\u5f9e\u8a13\u7df4\u96c6\u5207\u4e00\u5c0f\u90e8\u5206\u8cc7\u6599\u51fa\u4f86\u9032\u884c\u9a57\u8b49\u3002\u9a57\u8b49\u96c6\u7684\u7528\u8655\u5247\u662f\u7528\u4f86\u6aa2\u8996\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6bcf\u6b21\u7684\u8fed\u4ee3\u7d50\u679c\u8a13\u7df4\u7684\u597d\u4e0d\u597d\u3002\u4f46\u8a72\u5982\u4f55\u5207\u51fa\u9019\u500b\u9a57\u8b49\u96c6\u6bd4\u8f03\u6709\u516c\u4fe1\u529b\u5462\uff1f\u5982\u679c\u6211\u5011\u50c5\u5207\u4e00\u5c0f\u4efd\u7684\u8cc7\u6599\u4ed6\u662f\u80fd\u6709\u6709\u6548\u7684\u8a55\u4f30\u8a13\u7df4\u6642\u6a21\u578b\u7684\u597d\u58de\u55ce\uff1f\u5728\u67d0\u4e9b\u60c5\u6cc1\u5e95\u4e0b\u55ae\u7d14\u76f4\u63a5\u5f9e\u8cc7\u6599\u96c6\u88e1\u9762\u5207\u4e00\u584a\u51fa\u4f86\u7576\u9a57\u8b49\u96c6\uff0c\u662f\u6c92\u6709\u8fa6\u6cd5\u5f88\u6709\u6548\u7684\u53bb\u8a55\u4f30\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7684\u597d\u58de\u3002\u8aaa\u4e0d\u5b9a\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u9019\u4e00\u4efd\u9a57\u8b49\u96c6\u6070\u597d\u8868\u73fe\u5f97\u4e0d\u932f\uff0c\u5982\u679c\u53c8\u96a8\u6a5f\u62bd\u53e6\u4e00\u4efd\u8cc7\u6599\u4f86\u7576\u9a57\u8b49\u96c6\u8aaa\u4e0d\u5b9a\u7d50\u679c\u6703\u8b8a\u5f97\u5f88\u7cdf\u7cd5\u3002\u9019\u5c31\u8868\u793a\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u4e0d\u8db3\u3002\u70ba\u4e86\u907f\u514d\u9019\u7a2e\u60c5\u6cc1\u767c\u751f\u4e26\u4e14\u6709\u6548\u7684\u5207\u5272\u9a57\u8b49\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7684\u6280\u5de7\u4f86\u7372\u5f97\u6700\u4f73\u9a57\u8b49\u3002","title":"\u524d\u8a00"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_3","text":"\u5728\u89e3\u91cb\u4ea4\u53c9\u9a57\u8b49\u4e4b\u524d\u6211\u5011\u5148\u4f86\u8a0e\u8ad6\u5c07\u8cc7\u6599\u96c6\u5207\u5206\u70ba\u8a13\u7df4\u96c6\u3001\u6e2c\u8a66\u96c6\u548c\u9a57\u8b49\u96c6\u7684\u554f\u984c\u3002\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5148\u5207\u5272\u6210\u5169\u7b49\u4efd\uff0c\u5206\u5225\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u968e\u6bb5\u6a21\u578b\u53ea\u6703\u5c0d\u8a13\u7df4\u96c6\u9032\u884c\u64ec\u5408\uff0c\u53e6\u5916\u6e2c\u8a66\u96c6\u7684\u8cc7\u6599\u4e26\u672a\u53c3\u8207\u8a13\u7df4\uff0c\u56e0\u6b64\u53ef\u4ee5\u62ff\u4f86\u7576\u4f5c\u6700\u7d42\u8a55\u4f30\u6a21\u578b\u7684\u597d\u58de\u3002\u4f46\u662f\u6211\u5011\u8a13\u7df4\u7684\u6a21\u578b\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u90fd\u6709\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u4e5f\u5c31\u662f\u8aaa loss \u8981\u8d8a\u4f4e\u8d8a\u597d\u3002\u56e0\u6b64\u6700\u5e38\u898b\u7684\u4f5c\u6cd5\u6703\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u5207\u51fa\u4e00\u500b\u9a57\u8b49\u96c6\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u7684\u6a21\u578b\u53c3\u6578\uff0c\u4f7f\u5f97\u9a57\u8b49\u96c6\u7684\u8868\u73fe\u8981\u6700\u597d\u3002\u4f46\u662f\u70ba\u4e86\u907f\u514d\u6a21\u578b\u5c0d\u65bc\u6211\u5011\u6240\u5207\u7684\u9a57\u8b49\u96c6\u904e\u5ea6\u64ec\u5408\uff0c\u56e0\u6b64\u53ef\u5df2\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u7684\u65b9\u6cd5\u5c0d\u6a21\u578b\u505a\u66f4\u597d\u7684\u8a55\u4f30\u3002\u6240\u8b02\u7684\u4ea4\u53c9\u9a57\u8b49\u7c21\u55ae\u4f86\u8aaa\u662f\u5c07\u8a13\u7df4\u8cc7\u6599\u9032\u884c\u5206\u7d44\uff0c\u4e00\u90e8\u5206\u505a\u70ba\u8a13\u7df4\u5b50\u96c6\u4f86\u8a13\u7df4\u6a21\u578b\uff0c\u53e6\u4e00\u90e8\u5206\u505a\u70ba\u9a57\u8b49\u5b50\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\u3002\u7528\u8a13\u7df4\u5b50\u96c6\u7684\u6578\u64da\u5148\u8a13\u7df4\u6a21\u578b\uff0c\u7136\u5f8c\u7528\u9a57\u8b49\u5b50\u96c6\u53bb\u8dd1\u4e00\u904d\uff0c\u770b\u9a57\u8b49\u96c6\u7684\u640d\u5931\u51fd\u6578(loss)\u6216\u662f\u5206\u985e\u6e96\u78ba\u7387\u7b49\u3002\u7b49\u6a21\u578b\u8a13\u7df4\u597d\u4e4b\u5f8c\uff0c\u518d\u7528\u6e2c\u8a66\u96c6\u53bb\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e3b\u8981\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\u6709\u4ee5\u4e0b\u5e7e\u500b\u65b9\u6cd5: Holdout K-fold Leave one out cross validation Random Subsampling Bootstrap","title":"\u4ec0\u9ebc\u662f\u4ea4\u53c9\u9a57\u8b49\uff1f"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#holdout-method","text":"\u6b64\u65b9\u6cd5\u662f\u6700\u7d93\u5178\u4e14\u6700\u7c21\u55ae\u5be6\u4f5c\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\uff0cHoldout \u9867\u540d\u601d\u7fa9\u5c31\u662f\u5c07\u8cc7\u6599\u5207\u51fa\u4e00\u90e8\u5206\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u7684\u4f9d\u64da\u3002\u5728\u9019\u7a2e\u65b9\u6cd5\u4e2d\uff0c\u6211\u5011\u5c07\u8cc7\u6599\u96a8\u6a5f\u5206\u70ba\u4e09\u90e8\u5206\uff1a\u8a13\u7df4\u96c6\u3001\u9a57\u8b49\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53ea\u6709\u8a13\u7df4\u96c6\u8cc7\u6599\u5be6\u969b\u53c3\u8207\u8a13\u7df4\uff0c\u5176\u9918\u7684\u8cc7\u6599\u50c5\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u597d\u58de\u3002\u9a57\u8b49\u96c6\u4f7f\u7528\u6642\u6a5f\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u6aa2\u8996\u8a13\u7df4\u7684\u8da8\u52e2\uff0c\u82e5\u6709\u767c\u73fe\u904e\u64ec\u5408\u64ec\u5408\u8de1\u8c61\u53ef\u4ee5\u63d0\u65e9\u767c\u73fe\u4e26\u89e3\u6c7a\u3002\u4ee5\u53ca\u65b9\u4fbf\u6211\u5011\u9032\u884c\u8abf\u6574\u8d85\u53c3\u6578\u4ee5\u53ca\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\u3002\u7576\u7136\u50c5\u900f\u904e\u9a57\u8b49\u96c6\u4e0d\u80fd\u4ee3\u8868\u5168\u90e8\uff0c\u56e0\u6b64\u6700\u5f8c\u78ba\u5b9a\u597d\u6a21\u578b\u6642\u3002\u6211\u5011\u6703\u518d\u62ff\u4e8b\u5148\u5207\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6700\u7d42\u7684\u8a55\u4f30\uff0c\u6aa2\u8996\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002 \u53c3\u8003 \u512a\u9ede: 1. \u7c21\u55ae\u5be6\u4f5c\u3002 2. \u9a57\u8b49\u96c6\u53ef\u4ee5\u88ab\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u7684\u5b78\u7fd2\u6210\u679c\u3002 3. \u6e2c\u8a66\u96c6\u53ef\u4ee5\u8a55\u4f30\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002 \u7f3a\u9ede: 1. \u7576\u8cc7\u6599\u96c6\u8b8a\u7570\u91cf\u8f03\u5927\u6642\uff0c\u9a57\u8b49\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u80fd\u7121\u6cd5\u8db3\u4ee5\u8a55\u4f30\u6a21\u578b\u3002 2. \u4e0d\u9069\u5408\u7528\u5728\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002","title":"Holdout Method"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#k-fold-cross-validation","text":"\u4e0a\u4e00\u500b\u65b9\u6cd5\u96d6\u7136\u7c21\u55ae\uff0c\u4f46\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u50c5\u5207\u4e00\u4efd\u9a57\u8b49\u96c6\u5f80\u5f80\u4e0d\u80fd\u5920\u4ee3\u8868\u5168\u90e8\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u4e00\u4e9b\u6280\u5de7\u5207\u5272\u9a57\u8b49\u96c6\uff0c\u4f7f\u5f97\u8a13\u7df4\u904e\u7a0b\u4e2d\u6709\u4e00\u500b\u66f4\u516c\u6b63\u7684\u8a55\u4f30\u65b9\u5f0f\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e K-Fold \u65b9\u6cd5\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u4f9d\u5e8f\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0cK-Fold \u88e1\u9762\u7684\u6e2c\u8a66\u96c6\u53ef\u4ee5\u7576\u6210\u9a57\u8b49\u96c6\u3002K-Fold \u7684\u65b9\u6cd5\u4e2d K \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u4e2d\u6703\u9078\u64c7\u4e00\u7d44\u4f5c\u70ba\u9a57\u8b49\u96c6\uff0c\u5176\u9918 (k-1) \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u5b78\u7fd2\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 \u512a\u9ede: 1. \u964d\u4f4e\u6a21\u578b\u8a13\u7df4\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u504f\u5dee\u3002 2. \u8a13\u7df4\u96c6\u8207\u9a57\u8b49\u96c6\u5b8c\u6574\u88ab\u5145\u5206\u5229\u7528\u8207\u5b78\u7fd2\u3002 \u7f3a\u9ede: 1. \u4e0d\u9069\u5408\u7528\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 2. \u5982\u679c\u8981\u7c21\u55ae\u7684 K-fold \u4f86\u5c0b\u627e\u8d85\u53c3\u6578\u6703\u6709\u8cc7\u6599\u6d29\u6f0f\u554f\u984c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u6709\u504f\u5dee\uff0c\u56e0\u70ba\u5728\u6bcf\u500b Fold \u4e2d\u90fd\u6703\u4f7f\u7528\u540c\u4e00\u7d44\u8cc7\u6599\u9032\u884c\u9a57\u8b49\u3002 3. \u5728\u76f8\u540c\u7684\u9a57\u8b49\u96c6\u8a08\u7b97\u6a21\u578b\u7684\u8aa4\u5dee\uff0c\u7576\u627e\u5230\u4e86\u6700\u4f73\u7684\u8d85\u53c3\u6578\u3002\u9019\u53ef\u80fd\u6703\u5c0e\u81f4\u91cd\u5927\u504f\u5dee\uff0c\u6709\u904e\u64ec\u5408\u64ec\u5408\u7591\u616e\u3002","title":"K-fold Cross-Validation"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#leave-one-out","text":"\u6b64\u65b9\u6cd5\u662f K-fold \u5176\u4e2d\u4e00\u7a2e\u7279\u4f8b\uff0c\u7576 K \u7b49\u65bc\u8cc7\u6599\u96c6\u7684\u6578\u91cf\u6642\u5c31\u7b49\u65bc Leave One Out \u65b9\u6cd5\u3002\u4e5f\u5c31\u662f\u5728\u6bcf\u6b21\u8a13\u7df4\u6642\u50c5\u6703\u628a\u4e00\u7b46\u8cc7\u6599\u7576\u6210\u6e2c\u8a66\u8cc7\u6599\uff0c\u5176\u9918\u7684 N-1 \u7b46\u8cc7\u6599\u4f5c\u70ba\u8a13\u7df4\u6a21\u578b\u7684\u8cc7\u6599\u3002\u6b64\u4f5c\u6cd5\u76f8\u7576\u7c21\u55ae\u660e\u77ad\uff0c\u4f46\u662f\u8a13\u7df4\u8ca0\u64d4\u6703\u975e\u5e38\u91cd\u4e14\u8017\u6642\u3002\u7136\u800c Leave p-out \u662f\u53e6\u4e00\u7a2e\u6280\u5de7\uff0c\u5176\u4e2d\u7684 p \u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u6bcf\u6b21\u8a13\u7df4\u9700\u8981\u7559\u5e7e\u7b46\u8cc7\u6599\u4f5c\u70ba\u6e2c\u8a66\u96c6\u3002 \u512a\u9ede: 1. \u7c21\u55ae\u4e14\u5bb9\u6613\u7406\u89e3\uff0c\u597d\u5be6\u4f5c\u3002 \u7f3a\u9ede: 1. \u9700\u8981\u82b1\u8cbb\u66f4\u591a\u7684\u8a13\u7df4\u6642\u9593\u3002","title":"Leave One Out"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#random-subsampling","text":"","title":"Random Subsampling"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#bootstrapping","text":"\u9084\u6709\u4e00\u7a2e\u6bd4\u8f03\u7279\u6b8a\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u5f0f\uff0cBootstrapping \u81ea\u52a9\u62bd\u6a23\u6cd5\u3002\u662f\u4e00\u7a2e\u5f9e\u7d66\u5b9a\u8a13\u7df4\u96c6\u4e2d\u6709\u653e\u56de\u7684\u5747\u52fb\u62bd\u6a23\uff0c\u4e5f\u5c31\u662f\u8aaa\uff0c\u6bcf\u7576\u9078\u4e2d\u4e00\u500b\u6a23\u672c\uff0c\u5b83\u7b49\u53ef\u80fd\u5730\u88ab\u518d\u6b21\u9078\u4e2d\u4e26\u88ab\u518d\u6b21\u6dfb\u52a0\u5230\u8a13\u7df4\u96c6\u4e2d\u3002\u5047\u8a2d\u6bcf\u6b21\u8a13\u7df4\u90fd\u63a1\u6a23\u5341\u500b\u6a23\u672c\uff0c\u5728\u9019\u5341\u7b46\u8cc7\u6599\u4e2d\u5f88\u6709\u53ef\u80fd\u6703\u518d\u6b21\u88ab\u96a8\u6a5f\u62bd\u5230\u3002\u5269\u4e0b\u6c92\u6709\u62bd\u5230\u7684\u8cc7\u6599\u5247\u90fd\u8b8a\u6210\u6e2c\u8a66\u96c6\uff0c\u7528\u4f86\u8a55\u4f30\u8a13\u7df4\u5b8c\u7684\u6a21\u578b\u3002","title":"Bootstrapping"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_4","text":"\u4ea4\u53c9\u9a57\u8b49\u662f\u8a13\u7df4\u6a21\u578b\u4e2d\u975e\u5e38\u91cd\u8981\u7684\u6280\u5de7\uff0c\u5c24\u5176\u662f\u7576\u624b\u908a\u7684\u8cc7\u6599\u96c6\u6709\u9650\u6642\u66f4\u61c9\u8a72\u4f7f\u7528\u3002\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u6280\u5de7\uff0c\u5373\u4f7f\u5728\u6578\u64da\u6709\u9650\u7684\u60c5\u6cc1\u4e0b\uff0c\u6211\u5011\u4e5f\u80fd\u5920\u7372\u5f97\u6e96\u78ba\u7684\u7d50\u679c\uff0c\u4e26\u4e14\u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002\u4e26\u70ba\u6211\u5011\u63d0\u4f9b\u66f4\u6e96\u78ba\u7684\u6a21\u578b\u9810\u6e2c\u6027\u80fd\u4f30\u8a08\u65b9\u5f0f\uff0c\u540c\u6642\u4e5f\u80fd\u5920\u63d0\u5347\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002\u4ee5\u4e0a\u7684\u65b9\u6cd5\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 scikit-learn \u88e1\u9762 model_selection \u5e95\u4e0b\u7684 cross_val_score \u65b9\u6cd5\u9032\u884c\u5be6\u4f5c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5c0f\u7d50"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/","text":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 K-Fold \u5404\u7a2e\u4e0d\u540c\u8b8a\u5f62 K-Fold Cross-Validation Nested K-Fold Cross Validation Repeated K-Fold Stratified K-Fold Group K-Fold \u524d\u8a00 \u4ea4\u53c9\u9a57\u8b49\u53c8\u7a31\u70ba\u6a23\u672c\u5916\u6e2c\u8a66\uff0c\u662f\u8cc7\u6599\u79d1\u5b78\u4e2d\u91cd\u8981\u7684\u4e00\u74b0\u3002\u900f\u904e\u8cc7\u6599\u9593\u7684\u91cd\u8907\u63a1\u6a23\u904e\u7a0b\uff0c\u7528\u65bc\u8a55\u4f30\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u9a57\u8b49\u6a21\u578b\u5c0d\u7368\u7acb\u6e2c\u8a66\u6578\u64da\u96c6\u7684\u6cdb\u5316\u80fd\u529b\u3002\u5728\u4eca\u5929\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u8a73\u7d30\u7684\u4f86\u4ecb\u7d39\u6bcf\u4e00\u7a2e K-Fold \u8b8a\u578b\u3002 K-Fold Cross-Validation \u5728 K-Fold \u7684\u65b9\u6cd5\u4e2d\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5207\u5206\u70ba K \u7b49\u4efd\uff0cK \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff1a\u5047\u8a2d\u6211\u5011\u8a2d\u5b9a K=10\uff0c\u4e5f\u5c31\u662f\u5c07\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u5341\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u5341\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u5341\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u4e5d\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u8a13\u7df4\u5341\u56de\u5c07\u6703\u6709\u5341\u500b\u4e0d\u540c\u9a57\u8b49\u96c6\u7684 Error\uff0c\u9019\u500b Error \u901a\u5e38\u6211\u5011\u6703\u7a31\u4f5c loss \u4e5f\u5c31\u662f\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u6709\u5f88\u591a\u7a2e\uff0c\u4ee5\u56de\u6b78\u554f\u984c\u4f86\u8aaa\u5c31\u6709 MSE\u3001MAE\u3001RMSE...\u7b49\u3002\u6700\u7d42\u628a\u9019\u5341\u6b21\u7684 loss \u52a0\u7e3d\u8d77\u4f86\u53d6\u5e73\u5747\u5c31\u53ef\u4ee5\u7576\u6210\u6700\u7d42\u7d50\u679c\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 [scikit-learn] K-Fold Nested K-Fold Cross Validation \u6b64\u65b9\u6cd5\u70ba K-Fold \u7684\u8b8a\u578b\uff0cNested \u610f\u6307\u96d9\u8ff4\u5708(\u5de2\u72c0)\u7684\u610f\u601d\u3002\u5206\u5225\u6709\u5916\u5c64\u8ff4\u5708(Outer Loop)\u70ba\u4e00\u822c\u6b63\u5e38\u7684 K-Fold\u3002\u552f\u4e00\u4e0d\u540c\u7684\u662f\u6211\u5011\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\u6703\u5c07\u5916\u5c64 K-Fold \u7684\u8a13\u7df4\u96c6\u62ff\u51fa\u4f86\u518d\u9032\u5165\u5230\u5167\u5c64\u8ff4\u5708(Inner Loop)\u518d\u505a\u4e00\u6b21 K-Fold\u3002\u7531\u4e0b\u5716\u53ef\u4ee5\u770b\u5230\uff0c(1)\u6211\u5011\u53ef\u4ee5\u5728\u7b2c\u4e00\u500b\u5916\u5c64\u56de\u5708\u4e2d\u5c07\u8a13\u7df4\u8cc7\u6599\u53c8\u5207\u70ba\u4e94\u4efd\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5167\u5c64\u5708\u900f\u904e Grid Search \u7b49\u6f14\u7b97\u6cd5\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u3002\u7b49\u627e\u5230\u6700\u597d\u7684\u6a21\u578b\u8d85\u53c3\u6578\u5f8c\uff0c\u6211\u5011\u518d\u62ff(2)\u5916\u5c64\u56de\u5708\u7684\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a55\u4f30\u4e26\u8a08\u7b97 loss\u3002\u6700\u7d42\u6211\u5011\u6703\u5f97\u5230\u4e94\u500b\u6e2c\u8a66\u96c6 loss \u7684\u5e73\u5747\u4f5c\u70ba\u4ea4\u53c9\u9a57\u8b49\u6a21\u578b\u8a55\u4f30\u7d50\u679c\u3002 [scikit-learn] Nested K-Fold Repeated K-Fold \u53e6\u4e00\u500b K-Fold \u8b8a\u578b\u70ba Repeated K-Fold \u9867\u540d\u601d\u7fa9\u5c31\u662f\u91cd\u8907 n \u6b21 K-Fold cross-validation\u3002\u5047\u8a2d K=2\u3001n=2 \u4ee3\u8868 2-fold cross validation\uff0c\u5728\u6bcf\u4e00\u56de\u5408\u53c8\u6703\u5c07\u8cc7\u6599\u5c07\u6703\u6253\u4e82\u5f97\u5230\u65b0\u7d44\u5408\u3002\u56e0\u6b64\u6700\u7d42\u6703\u5f97\u5230 4 \u7d44\u7684\u8cc7\u6599\uff0c\u610f\u5473\u8457\u6a21\u578b\u5c07\u8a13\u7df4\u56db\u904d\u3002\u6b64\u7a2e\u65b9\u6cd5\u6703\u78ba\u4fdd\u6bcf\u6b21\u7d44\u5408\u7684\u96a8\u6a5f\u8cc7\u6599\u4e26\u4e0d\u6703\u91cd\u8907\u3002\u7c21\u55ae\u4f86\u8aaa\u57f7\u884c K-Fold \u4ea4\u53c9\u9a57\u8b49\uff0c\u7136\u5f8c\u91cd\u65b0\u6d17\u724c\u6578\u64da\uff0c\u7136\u5f8c\u518d\u6b21\u57f7\u884c K-Fold\u3002 [scikit-learn] RepeatedKFold Stratified K-Fold \u5206\u5c64\u4ea4\u53c9\u9a57\u8b49\uff0c\u6bcf\u500b Fold \u90fd\u662f\u6309\u7167\u985e\u5225\u7684\u6bd4\u4f8b\u62bd\u51fa\u4f86\u7684\u3002\u5047\u8a2d\u9019\u500b\u5206\u985e\u4efb\u52d9\u4e00\u5171\u6709\u4e09\u500b\u985e\u5225A\u3001B\u3001C\uff0c\u5b83\u5011\u7684\u6bd4\u4f8b\u662f1:4:8\u3002\u90a3\u9ebc\u6bcf\u500bfold\u4e2d\u7684A\u3001B\u3001C\u7684\u6bd4\u4f8b\u4e5f\u5fc5\u9808\u662f1:4:8\u3002\u5176\u5be6\u73fe\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f9d\u5e8f\u628aA\u3001B\u3001C\u985e\u5225\u7684\u6578\u64da\u96a8\u6a5f\u5206\u6210k\u7d44\uff0c\u6700\u5f8c\u518d\u628a\u5b83\u5011\u5408\u4f75\u4f9d\u7167\u6bd4\u4f8b\u8d77\u4f86\uff0c\u5c31\u5f97\u5230\u4e86k\u7d44\u6eff\u8db31:2:10\u7684\u6578\u64da\u4e86\u3002 \u512a\u9ede: 1. \u512a\u65bc\u4e00\u822c\u7684 K-Fold \u56e0\u70batest set\u80fd\u5145\u5206\u4ee3\u8868\u6574\u9ad4\u6578\u64da\u3002 2. \u9810\u6e2c\u7d50\u679c\u7684\u65b9\u5dee\u4e5f\u6703\u8b8a\u5c0f\uff0c\u4f7f\u5f97\u4ea4\u53c9\u9a57\u8b49\u7684 error \u66f4\u53ef\u9760\u3002 3. \u5c0d\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u6578\u64da\u5f88\u6709\u7528 \u7f3a\u9ede: 1. \u5927\u591a\u5be6\u4f8b\u90fd\u4ee5\u5206\u985e\u554f\u984c\u70ba\u4e3b [scikit-learn] StratifiedKFold [scikit-learn] StratifiedShuffleSplit Group K-Fold \u6b64\u505a\u6cd5\u70ba\u4e86\u907f\u514d\u53d6\u9023\u7e8c\u7684\u8cc7\u6599\u800c\u9020\u6210\u6e2c\u8a66\u96c6\u6216\u9a57\u8b49\u96c6\u504f\u5411\u67d0\u4e00\u7279\u5225\u7684\u72c0\u6cc1\u800c\u9020\u6210\u904e\u5ea6\u64ec\u548c\u8a13\u7df4\u96c6\uff0c\u53cd\u800c\u5728\u672a\u770b\u904e\u7684\u8cc7\u6599\u4e0b\u8868\u73fe\u4e0d\u597d\u3002Group K-Fold \u70ba\u4e86\u907f\u514d\u6b64\u60c5\u6cc1\u767c\u751f\uff0c\u5b83\u5207\u5272\u8cc7\u6599\u6642\u6709\u6548\u7684\u5f9e\u8cc7\u6599\u96c6\u4e2d\u6bcf\u500b\u5340\u584a\u96a8\u6a5f\u6311\u9078\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u540c\u6642\u4fdd\u8b49\u6bcf\u4e00\u500b Fold \u7684\u9a57\u8b49\u96c6\u4e26\u4e0d\u6703\u91cd\u8907\u7684\u8cc7\u6599\u3002\u5047\u8a2d\u4f60\u6709\u4e09\u500b\u985e\u5225\uff0c\u81f3\u5c11\u9a57\u8b49\u96c6\u5fc5\u9808\u5f9e\u4e09\u500b\u4e0d\u540c\u7684\u5206\u7d44\u4e2d\u62bd\u6a23\u53d6\u51fa\uff0c\u540c\u6642\u78ba\u4fdd\u6bcf\u4e00\u500b Fold \u6240\u62bd\u51fa\u4f86\u7684\u9019\u4e09\u500b\u5206\u7d44\u4e26\u4e0d\u6703\u91cd\u8907\u3002 [scikit-learn] GroupKFold \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#day-26-k-fold-cross-validation","text":"","title":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#_1","text":"\u4e86\u89e3 K-Fold \u5404\u7a2e\u4e0d\u540c\u8b8a\u5f62 K-Fold Cross-Validation Nested K-Fold Cross Validation Repeated K-Fold Stratified K-Fold Group K-Fold","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#_2","text":"\u4ea4\u53c9\u9a57\u8b49\u53c8\u7a31\u70ba\u6a23\u672c\u5916\u6e2c\u8a66\uff0c\u662f\u8cc7\u6599\u79d1\u5b78\u4e2d\u91cd\u8981\u7684\u4e00\u74b0\u3002\u900f\u904e\u8cc7\u6599\u9593\u7684\u91cd\u8907\u63a1\u6a23\u904e\u7a0b\uff0c\u7528\u65bc\u8a55\u4f30\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u9a57\u8b49\u6a21\u578b\u5c0d\u7368\u7acb\u6e2c\u8a66\u6578\u64da\u96c6\u7684\u6cdb\u5316\u80fd\u529b\u3002\u5728\u4eca\u5929\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u8a73\u7d30\u7684\u4f86\u4ecb\u7d39\u6bcf\u4e00\u7a2e K-Fold \u8b8a\u578b\u3002","title":"\u524d\u8a00"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#k-fold-cross-validation","text":"\u5728 K-Fold \u7684\u65b9\u6cd5\u4e2d\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5207\u5206\u70ba K \u7b49\u4efd\uff0cK \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff1a\u5047\u8a2d\u6211\u5011\u8a2d\u5b9a K=10\uff0c\u4e5f\u5c31\u662f\u5c07\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u5341\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u5341\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u5341\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u4e5d\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u8a13\u7df4\u5341\u56de\u5c07\u6703\u6709\u5341\u500b\u4e0d\u540c\u9a57\u8b49\u96c6\u7684 Error\uff0c\u9019\u500b Error \u901a\u5e38\u6211\u5011\u6703\u7a31\u4f5c loss \u4e5f\u5c31\u662f\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u6709\u5f88\u591a\u7a2e\uff0c\u4ee5\u56de\u6b78\u554f\u984c\u4f86\u8aaa\u5c31\u6709 MSE\u3001MAE\u3001RMSE...\u7b49\u3002\u6700\u7d42\u628a\u9019\u5341\u6b21\u7684 loss \u52a0\u7e3d\u8d77\u4f86\u53d6\u5e73\u5747\u5c31\u53ef\u4ee5\u7576\u6210\u6700\u7d42\u7d50\u679c\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 [scikit-learn] K-Fold","title":"K-Fold Cross-Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#nested-k-fold-cross-validation","text":"\u6b64\u65b9\u6cd5\u70ba K-Fold \u7684\u8b8a\u578b\uff0cNested \u610f\u6307\u96d9\u8ff4\u5708(\u5de2\u72c0)\u7684\u610f\u601d\u3002\u5206\u5225\u6709\u5916\u5c64\u8ff4\u5708(Outer Loop)\u70ba\u4e00\u822c\u6b63\u5e38\u7684 K-Fold\u3002\u552f\u4e00\u4e0d\u540c\u7684\u662f\u6211\u5011\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\u6703\u5c07\u5916\u5c64 K-Fold \u7684\u8a13\u7df4\u96c6\u62ff\u51fa\u4f86\u518d\u9032\u5165\u5230\u5167\u5c64\u8ff4\u5708(Inner Loop)\u518d\u505a\u4e00\u6b21 K-Fold\u3002\u7531\u4e0b\u5716\u53ef\u4ee5\u770b\u5230\uff0c(1)\u6211\u5011\u53ef\u4ee5\u5728\u7b2c\u4e00\u500b\u5916\u5c64\u56de\u5708\u4e2d\u5c07\u8a13\u7df4\u8cc7\u6599\u53c8\u5207\u70ba\u4e94\u4efd\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5167\u5c64\u5708\u900f\u904e Grid Search \u7b49\u6f14\u7b97\u6cd5\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u3002\u7b49\u627e\u5230\u6700\u597d\u7684\u6a21\u578b\u8d85\u53c3\u6578\u5f8c\uff0c\u6211\u5011\u518d\u62ff(2)\u5916\u5c64\u56de\u5708\u7684\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a55\u4f30\u4e26\u8a08\u7b97 loss\u3002\u6700\u7d42\u6211\u5011\u6703\u5f97\u5230\u4e94\u500b\u6e2c\u8a66\u96c6 loss \u7684\u5e73\u5747\u4f5c\u70ba\u4ea4\u53c9\u9a57\u8b49\u6a21\u578b\u8a55\u4f30\u7d50\u679c\u3002 [scikit-learn] Nested K-Fold","title":"Nested K-Fold Cross Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#repeated-k-fold","text":"\u53e6\u4e00\u500b K-Fold \u8b8a\u578b\u70ba Repeated K-Fold \u9867\u540d\u601d\u7fa9\u5c31\u662f\u91cd\u8907 n \u6b21 K-Fold cross-validation\u3002\u5047\u8a2d K=2\u3001n=2 \u4ee3\u8868 2-fold cross validation\uff0c\u5728\u6bcf\u4e00\u56de\u5408\u53c8\u6703\u5c07\u8cc7\u6599\u5c07\u6703\u6253\u4e82\u5f97\u5230\u65b0\u7d44\u5408\u3002\u56e0\u6b64\u6700\u7d42\u6703\u5f97\u5230 4 \u7d44\u7684\u8cc7\u6599\uff0c\u610f\u5473\u8457\u6a21\u578b\u5c07\u8a13\u7df4\u56db\u904d\u3002\u6b64\u7a2e\u65b9\u6cd5\u6703\u78ba\u4fdd\u6bcf\u6b21\u7d44\u5408\u7684\u96a8\u6a5f\u8cc7\u6599\u4e26\u4e0d\u6703\u91cd\u8907\u3002\u7c21\u55ae\u4f86\u8aaa\u57f7\u884c K-Fold \u4ea4\u53c9\u9a57\u8b49\uff0c\u7136\u5f8c\u91cd\u65b0\u6d17\u724c\u6578\u64da\uff0c\u7136\u5f8c\u518d\u6b21\u57f7\u884c K-Fold\u3002 [scikit-learn] RepeatedKFold","title":"Repeated K-Fold"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#stratified-k-fold","text":"\u5206\u5c64\u4ea4\u53c9\u9a57\u8b49\uff0c\u6bcf\u500b Fold \u90fd\u662f\u6309\u7167\u985e\u5225\u7684\u6bd4\u4f8b\u62bd\u51fa\u4f86\u7684\u3002\u5047\u8a2d\u9019\u500b\u5206\u985e\u4efb\u52d9\u4e00\u5171\u6709\u4e09\u500b\u985e\u5225A\u3001B\u3001C\uff0c\u5b83\u5011\u7684\u6bd4\u4f8b\u662f1:4:8\u3002\u90a3\u9ebc\u6bcf\u500bfold\u4e2d\u7684A\u3001B\u3001C\u7684\u6bd4\u4f8b\u4e5f\u5fc5\u9808\u662f1:4:8\u3002\u5176\u5be6\u73fe\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f9d\u5e8f\u628aA\u3001B\u3001C\u985e\u5225\u7684\u6578\u64da\u96a8\u6a5f\u5206\u6210k\u7d44\uff0c\u6700\u5f8c\u518d\u628a\u5b83\u5011\u5408\u4f75\u4f9d\u7167\u6bd4\u4f8b\u8d77\u4f86\uff0c\u5c31\u5f97\u5230\u4e86k\u7d44\u6eff\u8db31:2:10\u7684\u6578\u64da\u4e86\u3002 \u512a\u9ede: 1. \u512a\u65bc\u4e00\u822c\u7684 K-Fold \u56e0\u70batest set\u80fd\u5145\u5206\u4ee3\u8868\u6574\u9ad4\u6578\u64da\u3002 2. \u9810\u6e2c\u7d50\u679c\u7684\u65b9\u5dee\u4e5f\u6703\u8b8a\u5c0f\uff0c\u4f7f\u5f97\u4ea4\u53c9\u9a57\u8b49\u7684 error \u66f4\u53ef\u9760\u3002 3. \u5c0d\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u6578\u64da\u5f88\u6709\u7528 \u7f3a\u9ede: 1. \u5927\u591a\u5be6\u4f8b\u90fd\u4ee5\u5206\u985e\u554f\u984c\u70ba\u4e3b [scikit-learn] StratifiedKFold [scikit-learn] StratifiedShuffleSplit","title":"Stratified K-Fold"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#group-k-fold","text":"\u6b64\u505a\u6cd5\u70ba\u4e86\u907f\u514d\u53d6\u9023\u7e8c\u7684\u8cc7\u6599\u800c\u9020\u6210\u6e2c\u8a66\u96c6\u6216\u9a57\u8b49\u96c6\u504f\u5411\u67d0\u4e00\u7279\u5225\u7684\u72c0\u6cc1\u800c\u9020\u6210\u904e\u5ea6\u64ec\u548c\u8a13\u7df4\u96c6\uff0c\u53cd\u800c\u5728\u672a\u770b\u904e\u7684\u8cc7\u6599\u4e0b\u8868\u73fe\u4e0d\u597d\u3002Group K-Fold \u70ba\u4e86\u907f\u514d\u6b64\u60c5\u6cc1\u767c\u751f\uff0c\u5b83\u5207\u5272\u8cc7\u6599\u6642\u6709\u6548\u7684\u5f9e\u8cc7\u6599\u96c6\u4e2d\u6bcf\u500b\u5340\u584a\u96a8\u6a5f\u6311\u9078\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u540c\u6642\u4fdd\u8b49\u6bcf\u4e00\u500b Fold \u7684\u9a57\u8b49\u96c6\u4e26\u4e0d\u6703\u91cd\u8907\u7684\u8cc7\u6599\u3002\u5047\u8a2d\u4f60\u6709\u4e09\u500b\u985e\u5225\uff0c\u81f3\u5c11\u9a57\u8b49\u96c6\u5fc5\u9808\u5f9e\u4e09\u500b\u4e0d\u540c\u7684\u5206\u7d44\u4e2d\u62bd\u6a23\u53d6\u51fa\uff0c\u540c\u6642\u78ba\u4fdd\u6bcf\u4e00\u500b Fold \u6240\u62bd\u51fa\u4f86\u7684\u9019\u4e09\u500b\u5206\u7d44\u4e26\u4e0d\u6703\u91cd\u8907\u3002 [scikit-learn] GroupKFold \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Group K-Fold"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/","text":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u7684\u5341\u500b\u932f\u8aa4 \u524d\u8a00 \u4eba\u5de5\u667a\u6167\u8fd1\u5e74\u4f86\u6210\u70ba\u4efb\u4f55\u7522\u696d\u71b1\u9580\u7684\u8a71\u984c\u4e4b\u4e00\uff0c\u5404\u516c\u53f8\u7a4d\u6975\u5730\u5c0e\u5165\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u5354\u52a9\u7522\u696d AI \u5316\u3002\u4f8b\u5982\uff1a\u667a\u6167\u91ab\u7642\u3001\u667a\u6167\u4ea4\u901a\u3001\u667a\u6167\u88fd\u9020......\u7b49\u3002\u6b63\u662f\u56e0\u70ba AI \u6280\u8853\u7684\u5275\u65b0\u8207\u666e\u53ca\uff0c\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u518d\u4e5f\u4e0d\u662f\u7406\u5de5\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\u3002\u6b64\u5916\u96a8\u8457 Python \u958b\u767c\u793e\u7fa4\u8301\u58ef\uff0c\u8a31\u591a\u958b\u6e90\u7684 AI \u5957\u4ef6\u5982\u96e8\u5f8c\u6625\u7b4d\u822c\u7684\u51fa\u73fe\u5927\u5927\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u9580\u6abb\u3002\u5728\u4eca\u5929\u7684\u5167\u5bb9\u4e2d\u6211\u60f3\u85c9\u7531\u9435\u4eba\u8cfd\u4f86\u8ddf\u5927\u5bb6\u5206\u4eab\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b\uff0c\u4e26\u4e14\u5f9e\u8cc7\u6599\u9762\u8207\u6a21\u578b\u9762\u7684\u89d2\u5ea6\u4f86\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u61c9\u8a72\u6ce8\u610f\u7684\u5e7e\u4ef6\u4e8b\u3002\u5c24\u5176\u662f\u5728\u521d\u5b78\u968e\u6bb5\uff0c\u56e0\u7f3a\u4e4f\u7d93\u9a57\u5f80\u5f80\u6703\u72af\u4e00\u4e9b\u7121\u53ef\u907f\u514d\u7684\u932f\u8aa4\u3002\u6240\u4ee5\u9019\u7bc7\u6587\u7ae0\u5c07\u9ede\u51fa\u5341\u500b\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5e38\u72af\u7684\u96b1\u5f62\u932f\u8aa4\u3002 \u8cc7\u6599\u9762 \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u6a21\u578b\u9762 \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a 1. \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u6a5f\u5668\u5b78\u7fd2\u9996\u8981\u7684\u6b65\u9a5f\u662f\u5b9a\u7fa9\u554f\u984c\uff0c\u7576\u78ba\u5b9a\u76ee\u6a19\u8207\u65b9\u5411\u5f8c\u5373\u53ef\u958b\u59cb\u641c\u96c6\u8cc7\u6599\u3002\u76f8\u4fe1\u5927\u5bb6\u90fd\u77e5\u9053\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f97\u4f86\u4e0d\u6613\uff0c\u5373\u4f7f\u5f9e\u8cc7\u6599\u5eab\u53d6\u5f97\u4e86\u9019\u4e9b\u8cc7\u6599\u5f8c\u6211\u5011\u9084\u9700\u8981\u82b1\u5927\u91cf\u7684\u6642\u9593\u9032\u884c\u8cc7\u6599\u6e05\u6d17\u3002\u6240\u8b02\u7684\u8cc7\u6599\u6e05\u6d17\u662f\u8cc7\u6599\u5eab\u7576\u4e2d\u53ef\u80fd\u6703\u6709\u7f3a\u5931\u503c\uff0c\u4f8b\u5982\uff1aNA\u3001Inf\u3001NaN\u3001NULL\u3002 NA\uff1a\u8868\u793a\u7f3a\u5931\u503c\uff0c\u662f Not Available \u7684\u7e2e\u5beb\u3002 Inf\uff1a\u8868\u793a\u7121\u7aae\u5927\uff0c\u662f Infinite \u7684\u7e2e\u5beb\u3002 NaN\uff1a\u8868\u793a\u975e\u6578\u503c\uff0c\u662f Not a Number \u7684\u7e2e\u5beb\u3002 NULL\uff1a\u8868\u793a\u7a7a\u503c\uff0c\u5373\u6c92\u6709\u5167\u5bb9\u3002 \u7576\u8cc7\u6599\u90fd\u5b8c\u6210\u4e86\u524d\u8655\u7406\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u5efa\u7acb\u6a21\u578b\u8207\u8a55\u4f30\u6a21\u578b\u3002\u4f46\u662f\u7576\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u8868\u73fe\u4e0d\u597d\u6709\u5f88\u591a\u7684\u56e0\u7d20\u3002\u5927\u5bb6\u6700\u5e38\u505a\u7684\u662f\u66ff\u63db\u6a21\u578b\u6f14\u7b97\u6cd5\uff0c\u6216\u662f\u5617\u8a66\u4e0d\u540c\u7684\u6a21\u578b\u8d85\u53c3\u6578\u53d6\u5f97\u4e00\u500b\u6700\u4f73\u7684\u7d50\u679c\u3002\u4f46\u662f\u5728\u9032\u884c\u9019\u4e9b\u505a\u4e4b\u524d\uff0c\u5efa\u8b70\u5927\u5bb6\u5148\u628a\u95dc\u6ce8\u7684\u9ede\u56de\u5230\u8cc7\u6599\u8655\u7406\u9762\u3002\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u5176\u4e2d\u4e00\u500b\u56e0\u7d20\u662f\u8cc7\u6599\u7684\u6a19\u7c64\u6536\u96c6\u4e0d\u7576\u3002Landing.ai \u57f7\u884c\u9577\u5433\u6069\u9054\u4e5f\u66fe\u7d93\u8aaa\u904e\u7576\u4e00\u500b\u5c0f\u8cc7\u6599\u96c6\u5b58\u5728\u8457\u932f\u8aa4\u6a19\u7c64\u6642\uff0c\u6a21\u578b\u5f88\u96e3\u7d66\u51fa\u4e00\u500b\u6b63\u78ba\u7684\u8f38\u51fa\u3002\u56e0\u70ba\u8cc7\u6599\u9593\u593e\u5e36\u4e86\u96dc\u8a0a\u5f80\u5f80\u6703\u4f7f\u7684\u6a21\u578b\u5b58\u5728\u8457\u4e00\u4e9b\u504f\u5dee\uff0c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002\u56e0\u6b64\u7b46\u8005\u5efa\u8b70\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u6642\u5019\uff0c\u53ef\u4ee5\u56de\u982d\u89c0\u5bdf\u8cc7\u6599\u662f\u5426\u5b58\u5728\u4e00\u4e9b\u932f\u8aa4\u3002\u800c\u4e0d\u662f\u4e00\u6627\u7684\u8abf\u6574\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u8d85\u53c3\u6578\u3002 2. \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u5728\u5206\u985e\u7684\u8cc7\u6599\u4e2d\uff0c\u521d\u5b78\u8005\u5e38\u898b\u7684\u932f\u8aa4\u662f\u5fd8\u8a18\u4f7f\u7528\u5206\u5c64\u62bd\u6a23 (stratify) \u4f86\u5c0d\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u9032\u884c\u5207\u5272\u3002\u7576\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u76e1\u53ef\u80fd\u8207\u8a13\u7df4\u76f8\u540c\u60c5\u6cc1\u4e0b\uff0c\u6a21\u578b\u624d\u66f4\u6709\u53ef\u80fd\u5f97\u5230\u66f4\u6e96\u78ba\u7684\u9810\u6e2c\u3002\u7136\u800c\u5728\u5206\u985e\u7684\u554f\u984c\u4e2d\uff0c\u6211\u5011\u5fc5\u9808\u66f4\u95dc\u6ce8\u6bcf\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002\u4ee5\u4e0b\u8209\u500b\u4f8b\u5b50\uff1a\u5047\u8a2d\u6211\u5011\u6709\u4e09\u500b\u6a19\u7c64\u7684\u985e\u5225\uff0c\u800c\u9019\u4e09\u500b\u985e\u5225\u7684\u5206\u4f48\u6bd4\u4f8b\u5206\u5225\u70ba 4:3:3\u3002\u540c\u7406\u6211\u5011\u5728\u9032\u884c\u8cc7\u6599\u5207\u5272\u7684\u6642\u5019\u5fc5\u9808\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u9700\u8981\u6709\u76f8\u540c\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002 \u5927\u5bb6\u61c9\u8a72\u90fd\u4f7f\u7528\u904e Sklearn \u7684 train_test_split \u9032\u884c\u8cc7\u6599\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d Sklearn \u63d0\u4f9b\u4e86\u4e00\u500b stratify \u53c3\u6578\u9054\u5230\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u7684\u76ee\u7684\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\uff1a\u4f8b\u5982\uff0c\u8ca0\u6a23\u672c\u8207\u6b63\u6a23\u672c\u6bd4\u4f8b\u61f8\u6b8a(\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u3001\u96e2\u8077\u54e1\u5de5\u9810\u6e2c)\u3002\u4ee5\u4e0b\u7528\u7d05\u9152\u5206\u985e\u9810\u6e2c\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u9996\u5148\u6211\u5011\u4e0d\u4f7f\u7528 stratify \u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u4e26\u67e5\u770b\u8cc7\u6599\u5207\u5272\u524d\u5f8c\u7684\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b\u3002 from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split X , y = load_wine ( return_X_y = True ) # \u67e5\u770b\u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y ) . value_counts ( normalize = True ) # \u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398876 0 0.331461 2 0.269663 dtype: float64 # \u5be6\u9a57\u4e00: \u4e0d\u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.390977 0 0.330827 2 0.278195 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.511111 0 0.266667 2 0.222222 dtype: float64 \u5f9e\u4e0a\u9762\u5207\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u90fd\u4e0d\u540c\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u4f7f\u7528 stratify \u53c3\u6578\u518d\u5207\u5272\u4e00\u6b21\u3002 # \u5be6\u9a57\u4e8c: \u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.400000 0 0.333333 2 0.266667 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398496 0 0.330827 2 0.270677 dtype: float64 \u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5c07 stratify \u8a2d\u7f6e\u70ba\u76ee\u6a19 (y) \u5728\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u4e2d\u7522\u751f\u76f8\u540c\u7684\u5206\u4f48\u3002\u56e0\u70ba\u6539\u8b8a\u7684\u985e\u5225\u7684\u6bd4\u4f8b\u662f\u4e00\u500b\u56b4\u91cd\u7684\u554f\u984c\uff0c\u53ef\u80fd\u6703\u4f7f\u6a21\u578b\u66f4\u504f\u5411\u65bc\u7279\u5b9a\u7684\u985e\u5225\u3002\u56e0\u6b64\u8a13\u7df4\u8cc7\u6599\u7684\u5206\u4f48\u5fc5\u9808\u8981\u8207\u5be6\u969b\u60c5\u6cc1\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002 3. \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u8cc7\u6599\u8996\u89ba\u5316\u7684\u597d\u8655\u591a\u591a\uff0c\u5728\u672c\u7cfb\u5217\u6587\u7ae0 [Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\uff1f\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427\uff01 \u8207 [Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u8b1b\u89e3\u4e86\u8a31\u591a Python \u8cc7\u6599\u8996\u89ba\u5316\u7684\u6280\u5de7\u3002\u8cc7\u6599\u8996\u89ba\u5316\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5206\u6790\u8207\u7d71\u8a08\u8cc7\u6599\u7684\u578b\u614b\uff0c\u5f80\u5f80\u6709\u597d\u7684\u8cc7\u6599\u6e05\u6d17\u8207\u524d\u8655\u7406\u5c0d\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u6703\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u6709\u8208\u8da3\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003 \u5b89\u65af\u5eab\u59c6\u56db\u91cd (Anscombe\u2019s quartet) \u3002\u4ed6\u4e3b\u8981\u662f\u662f\u900f\u904e\u56db\u500b\u5c0f\u8cc7\u6599\u96c6\u4e26\u900f\u904e\u8996\u89ba\u5316\u8207\u7d71\u8a08\u4f86\u89c0\u5bdf\uff0c\u4e26\u8aaa\u660e\u5728\u5206\u6790\u6578\u64da\u524d\u5148\u7e6a\u88fd\u5716\u8868\u7684\u91cd\u8981\u6027\uff0c\u4ee5\u53ca\u96e2\u7fa4\u503c\u5c0d\u7d71\u8a08\u7684\u5f71\u97ff\u4e4b\u5927\u3002 4. \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u901a\u5e38\u6211\u5011\u8981\u70ba\u985e\u5225\u7684\u7279\u5fb5\u9032\u884c\u7de8\u78bc\uff0c\u76f4\u89ba\u6703\u60f3\u5230 Sklearn \u7684 LabelEncoder \u3002\u4f46\u662f\u5982\u679c\u4e00\u500b\u8cc7\u6599\u96c6\u4e2d\u6709\u591a\u500b\u7279\u5fb5\u662f\u5c6c\u65bc\u985e\u5225\u578b\u7684\u8cc7\u6599\uff0c\u8c48\u4e0d\u662f\u5f88\u9ebb\u7169?\u5fc5\u9808\u8981\u4e00\u500b\u4e00\u500b\u547c\u53eb LabelEncoder \u5206\u5225\u70ba\u9019\u4e9b\u7279\u5fb5\u9032\u884c\u8f49\u63db\u3002\u5982\u679c\u4f60\u770b\u5230\u9019\u908a\u6709\u540c\u611f\u7684\uff0c\u5728\u9019\u88e1\u8981\u544a\u8a34\u4f60\u4e8b\u5be6\u4e26\u975e\u5982\u6b64\uff01\u6211\u5011\u770b\u770b \u5728\u5b98\u65b9\u6587\u4ef6\u4e0b LabelEncoder \u7684\u63cf\u8ff0\uff1a This transformer should be used to encode target values, i.e. y, and not the input X. \u7c21\u55ae\u4f86\u8aaa LabelEncoder \u53ea\u662f\u88ab\u7528\u4f86\u7de8\u78bc\u8f38\u51fa\u9805 y \u800c\u5df2\u7684\uff01\u4f60\u9084\u5728\u7528\u5b83\u4f86\u7de8\u78bc\u4f60\u7684\u6bcf\u500b x \u55ce\uff1f\uff08\u6688 \u90a3\u9ebc\u6211\u5011\u8a72\u7528\u4ec0\u9ebc\u65b9\u6cd5\u4f86\u7de8\u78bc\u6709\u9806\u5e8f\u7684\u985e\u5225\u7279\u5fb5\u5462\uff1f\u5982\u679c\u4f60\u4ed4\u7d30\u95b1\u8b80\u6709\u95dc\u7de8\u78bc\u5206\u985e\u7279\u5fb5\u7684 Sklearn \u7528\u6236\u6307\u5357\uff0c\u4f60\u6703\u770b\u5230\u5b83\u6e05\u695a\u5730\u8aaa\u660e\uff1a To convert categorical features to integer codes, we can use the OrdinalEncoder. This estimator transforms each categorical feature to one new feature of integers (0 to n_categories - 1) \u770b\u5230\u9019\u908a\u5927\u5bb6\u61c9\u8a72\u77e5\u9053\u95b1\u8b80\u5b98\u65b9\u6587\u4ef6\u7684\u91cd\u8981\u6027\u5427\uff01\u5b98\u65b9\u6587\u4ef6\u4e2d\u5efa\u8b70 x \u9805\u7684\u8f38\u5165\u7279\u5fb5\u53ef\u4ee5\u63a1\u7528 OrdinalEncoder \u4e00\u6b21\u70ba\u6240\u6709\u7279\u5fb5\u4f9d\u5e8f\u505a Label Encoding\u3002OrdinalEncoder \u7de8\u78bc\u5668\u7684\u4f7f\u7528\u65b9\u5f0f\u5982\u4e0b\uff1a from sklearn.preprocessing import OrdinalEncoder enc = OrdinalEncoder () X = [[ 'Male' , 1 ], [ 'Female' , 3 ], [ 'Female' , 2 ]] enc . fit ( X ) print ( enc . categories_ ) enc . transform ([[ 'Female' , 3 ], [ 'Male' , 1 ]]) [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)] array([[0., 2.], [1., 0.]]) \u4ee5\u4e0a\u7684\u7bc4\u4f8b\u662f X \u6709\u4e09\u7b46\u8cc7\u6599\uff0c\u6bcf\u7b46\u8cc7\u6599\u90fd\u6709\u5169\u500b\u7279\u5fb5\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7b2c\u4e00\u500b\u7279\u5fb5\u662f\u6027\u5225 Male \u8207 Female\uff0c\u56e0\u6b64 OrdinalEncoder \u6703\u4f9d\u9020\u5b57\u6bcd\u958b\u982d\u505a\u6392\u5e8f Female \u7de8\u78bc\u70ba 0 \u800c Male \u7de8\u78bc\u70ba 1\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u7279\u5fb5\u70ba\u6578\u5b57 1\u30012\u30013\uff0c\u540c\u7406\u4f9d\u5e8f\u70ba\u4ed6\u5011\u7de8\u78bc\u6210 0\u30011\u30012\u3002\u53ea\u9700\u95b1\u8b80\u5b98\u65b9\u6587\u6a94\u548c\u7528\u6236\u6307\u5357\uff0c\u4f60\u5c31\u53ef\u4ee5\u4e86\u89e3\u5f88\u591a\u95dc\u65bc Sklearn \u7684\u77e5\u8b58\uff01\u662f\u4e0d\u662f\u5f88\u68d2\uff5e 5. \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u8cc7\u6599\u6d29\u6f0f (data leakage) \u662f\u500b\u96b1\u5f62\u6bba\u624b\uff0c\u5b83\u6703\u5728\u4e0d\u77e5\u4e0d\u89ba\u4e2d\u5f71\u97ff\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3002\u5176\u767c\u751f\u7684\u6642\u6a5f\u5728\u65bc\u4f60\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u4e0d\u61c9\u8a72\u5c07\u6e2c\u8a66\u7684\u8cc7\u6599\u7684\u8cc7\u8a0a\u6d29\u6f0f\u5230\u8a13\u7df4\u904e\u7a0b\u4e2d\u3002\u5b83\u6703\u9020\u6210\u6a21\u578b\u7d66\u51fa\u4e00\u500b\u975e\u5e38\u6a02\u89c0\u7684\u7d50\u679c\uff0c\u5373\u4f7f\u5728\u4ea4\u53c9\u9a57\u8b49\u4e2d\u4e5f\u662f\u5982\u6b64\uff0c\u4f46\u5728\u5c0d\u5be6\u969b\u65b0\u6578\u64da\u9032\u884c\u6e2c\u8a66\u6642\u8868\u73fe\u6703\u975e\u5e38\u5730\u7cdf\u7cd5\u3002 \u8cc7\u6599\u6d29\u6f0f\u6700\u5e38\u767c\u751f\u65bc\u8cc7\u6599\u524d\u8655\u7406\u7684\u968e\u6bb5\uff0c\u5c24\u5176\u662f\u7576\u4f60\u7684\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5c1a\u672a\u5207\u5272\u7684\u6642\u5019\u3002Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a\u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u6cd5\uff0c\u4f8b\u5982: \u7f3a\u5931\u503c\u88dc\u503c(imputers)\u3001\u6b63\u898f\u5316 (normalizers)\u3001\u6a19\u6e96\u5316(standardization)\u4ee5\u53ca\u5c0d\u6578(log) \u8f49\u63db...\u7b49\u3002\u9019\u4e9b\u8f49\u63db\u5668\u90fd\u6703\u4f9d\u8cf4\u65bc\u4f60\u8f38\u5165\u8cc7\u6599\u7684\u5206\u4f48\uff0c\u4e26\u4f9d\u7167\u6b64\u5206\u4f48\u505a\u76f8\u5c0d\u61c9\u7684\u64ec\u5408\u3002 \u8209\u4f8b\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a19\u6e96\u5316\u6642(StandardScaler)\u900f\u904e\u5f9e\u6bcf\u7b46\u8cc7\u6599\u4e2d\u6e1b\u53bb\u5e73\u5747\u503c\u4e26\u5c07\u5176\u9664\u4ee5\u6a19\u6e96\u504f\u5dee\u4f86\u7372\u5f97\u7e2e\u653e\u5f8c\u7684\u6578\u64da\u3002\u6211\u5011\u4f7f\u7528 fit() \u65b9\u6cd5\u5728\u6240\u6709\u8cc7\u6599\u96c6 X \u4e0a\u505a\u8f49\u63db\uff0c\u4e26\u4f7f\u5f97\u8f49\u63db\u5668\u5b78\u7fd2\u6bcf\u500b\u7279\u5fb5\u7684\u6574\u500b\u5206\u4f48\u7684\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee\u3002\u9019\u4e9b\u8cc7\u6599\u8f49\u63db\u5f8c\u5982\u679c\u518d\u5c07\u9019\u4e9b\u6578\u64da\u62c6\u5206\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5247\u8a13\u7df4\u96c6\u6703\u53d7\u5230\u6c61\u67d3\u3002\u56e0\u70ba StandardScaler \u5f9e\u5be6\u969b\u5206\u4f48\u4e2d\u6d29\u9732\u4e86\u6e2c\u8a66\u96c6\u91cd\u8981\u8a0a\u606f\uff0c\u4e00\u822c\u4f86\u8aaa\u6211\u5011\u4e0d\u80fd\u5c07\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u60c5\u6cc1\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u3002\u96d6\u7136\u6211\u5011\u5e0c\u671b\u8a13\u7df4\u96c6\u7684\u5206\u4f48\u8207\u5be6\u969b\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u56e0\u70ba\u4f7f\u5f97\u6a21\u578b\u8868\u73fe\u7d50\u679c\u7a69\u5b9a\u3002 \u96d6\u7136\u6211\u5011\u628a\u6e2c\u8a66\u96c6\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u4e26\u505a\u8f49\u63db\uff0c\u9019\u4e00\u6b65\u9a5f\u5c0d\u6211\u5011\u4f86\u8aaa\u53ef\u80fd\u6c92\u4ec0\u9ebc\u3002\u4f46\u662f\u5c0d\u65bc Sklearn \u5f37\u5927\u7684\u6f14\u7b97\u6cd5\uff0c\u53ef\u80fd\u6703\u900f\u904e\u9019\u500b\u907a\u6f0f\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u7684\u8a0a\u606f\u628a\u6a21\u578b\u64ec\u5408\u7684\u5f88\u597d\u3002\u5c46\u6642\u6a21\u578b\u8a13\u7df4\u5b8c\u6210\u5f8c\uff0c\u6e2c\u8a66\u96c6\u4e0d\u5920\u65b0\u7a4e\uff0c\u7121\u6cd5\u5728\u5be6\u969b\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002 \u6700\u7c21\u55ae\u7684\u89e3\u6c7a\u8fa6\u6cd5\uff0c\u5c31\u662f\u4e0d\u8981\u4f7f\u7528 fit() \u4e00\u6b21\u8f49\u63db\u6240\u6709\u7684\u8cc7\u6599\u3002\u5728\u505a\u4efb\u4f55\u8cc7\u6599\u8f49\u63db\u4e4b\u524d\u8981\u5148\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5df2\u7d93\u5b8c\u6574\u5730\u88ab\u5207\u958b\u3002\u5373\u4f7f\u5207\u958b\u5f8c\u4e5f\u4e0d\u8981\u518d\u62ff\u6e2c\u8a66\u96c6\u547c\u53eb fit() \u6216 fit_transform() \uff0c\u9019\u4e00\u6a23\u6703\u5c0e\u81f4\u76f8\u540c\u554f\u984c\u767c\u751f\u3002\u56e0\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5fc5\u9808\u9032\u884c\u76f8\u540c\u7684\u8f49\u63db\uff0c\u4f9d\u7167\u5b98\u65b9\u7684\u7bc4\u4f8b\u6211\u5011\u5fc5\u9808\u5148\u4f7f\u7528 fit_transform() \u5728\u8a13\u7df4\u96c6\u4e0a\u9032\u884c\u64ec\u5408\u8207\u8f49\u63db\u3002\u9019\u78ba\u4fdd\u4e86\u8f49\u63db\u5668\u50c5\u5f9e\u8a13\u7df4\u96c6\u5b78\u7fd2\uff0c\u5f9e\u4e2d\u627e\u51fa\u53c3\u6578\u4f8b\u5982\u5e73\u5747\u503c\u8207\u8b8a\u7570\u6578\u4e26\u540c\u6642\u5c0d\u5176\u9032\u884c\u8b8a\u63db\u3002\u63a5\u8457\u4f7f\u7528 transform() \u65b9\u6cd5\u5728\u6e2c\u8a66\u8cc7\u6599\u4e0a\u9032\u884c\u8f49\u63db\uff0c\u6839\u64da\u5f9e\u8a13\u7df4\u6578\u64da\u4e2d\u5b78\u5230\u7684\u8a0a\u606f\u9032\u884c\u8f49\u63db\u3002 from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler X , y = load_iris ( return_X_y = True ) X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y , random_state = 44 ) scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) X_test_scaled = scaler . transform ( X_test ) \u66f4\u5f37\u5927\u7684\u89e3\u6c7a\u65b9\u6848\u662f\u4f7f\u7528 Sklearn \u5167\u5efa\u7684 pipeline\uff0c\u5b83\u80fd\u5920\u4fdd\u8b77\u6a21\u578b\u514d\u65bc\u8cc7\u6599\u6d29\u6f0f\u7684\u554f\u984c\u3002\u6b64\u65b9\u6cd5\u80fd\u5920\u78ba\u4fdd\u8a13\u7df4\u8cc7\u6599\u50c5\u53c3\u8207\u8f49\u63db\u64ec\u5408\u8207\u6a21\u578b\u8a13\u7df4\uff0c\u800c\u6e2c\u8a66\u8cc7\u6599\u50c5\u7528\u65bc\u8a08\u7b97\u4e26\u9a57\u8b49\u6a21\u578b\u3002 6. \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5982\u679c\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599 R2 score \u5f97\u5230\u4e86 0.85 \u5c31\u4ee3\u8868\u5f88\u597d\u4e86\u55ce\uff1f\u4e0d\u76e1\u7136\uff01\u5118\u7ba1\u6709\u9ad8\u7684\u6e2c\u8a66\u5206\u6578\u901a\u5e38\u610f\u5473\u8457\u6a21\u578b\u8868\u73fe\u4f73\uff0c\u4f46\u5728\u89e3\u91cb\u6e2c\u8a66\u7d50\u679c\u6642\u4ecd\u6709\u4e00\u4e9b\u91cd\u8981\u7684\u6ce8\u610f\u4e8b\u9805\u3002\u9996\u5148\u6700\u91cd\u8981\u7684\uff0c\u7121\u8ad6\u5206\u6578\u503c\u5982\u4f55\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u4e00\u5b9a\u8981\u8207\u8a13\u7df4\u96c6\u76f8\u6bd4\u8f03\u624d\u80fd\u78ba\u4fdd\u6a21\u578b\u8a13\u7df4\u597d\u8207\u58de\u3002\u7576\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u96c6\u5206\u6578\u9ad8\u65bc\u6e2c\u8a66\u96c6\u7684\u5206\u6578\uff0c\u4e26\u4e14\u5169\u8005\u90fd\u8db3\u5920\u9ad8\u4ee5\u6eff\u8db3\u5c08\u6848\u7684\u76ee\u6a19\u671f\u671b\u6642\u9019\u4ee3\u8868\u4f60\u8a13\u7df4\u4e86\u4e00\u500b\u597d\u6a21\u578b\u3002\u7136\u800c\u9019\u4e26\u4e0d\u610f\u5473\u8457\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u7570\u8d8a\u5927\u8d8a\u597d\u3002\u8209\u500b\u4f8b\u5b50\uff0c\u82e5\u8a13\u7df4\u96c6\u7684 R2 score \u70ba 0.85 \u6e2c\u8a66\u96c6\u70ba 0.8 \u5373\u4ee3\u8868\u6a21\u578b\u65e2\u4e0d\u904e\u5ea6\u64ec\u5408(overfit)\u4e5f\u4e0d\u6b20\u64ec\u5408(underfit)\u3002\u4f46\u662f\u5982\u679c\u8a13\u7df4\u96c6 0.9 \u6e2c\u8a66\u96c6 0.8 \u7684\u6642\u5019\uff0c\u4f60\u7684\u6a21\u578b\u5c31\u662f\u904e\u64ec\u5408\u3002\u5176\u539f\u56e0\u662f\u8a72\u6a21\u578b\u6c92\u6709\u5728\u8a13\u7df4\u671f\u9593\u9032\u884c\u6cdb\u5316\uff0c\u800c\u662f\u8a18\u4f4f\u4e86\u4e00\u4e9b\u8a13\u7df4\u6578\u64da\uff0c\u5f9e\u800c\u5c0e\u81f4\u6e2c\u8a66\u5206\u6578\u4f4e\u5f97\u591a\u3002 \u5728\u5927\u591a\u6578\u4efb\u52d9\u4e2d\u4f60\u5c07\u6703\u770b\u5230\u8a31\u591a\u4eba\u4f7f\u7528 tree-based \u6a21\u578b\u6216\u662f\u6574\u9ad4\u5b78\u7fd2\u6a21\u578b (ensemble models)\u3002\u4f8b\u5982\u5728\u96a8\u6a5f\u68ee\u6797\u6f14\u7b97\u6cd5\u7576\u4e2d\u5982\u679c\u5b83\u5011\u7684\u6a39\u6df1\u5ea6\u592a\u6df1\uff0c\u5f80\u5f80\u6703\u7372\u5f97\u975e\u5e38\u9ad8\u7684\u8a13\u7df4\u5206\u6578\uff0c\u5f9e\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u53e6\u5916\u4e5f\u6709\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u6bd4\u8a13\u7df4\u96c6\u9ad8\u7684\u60c5\u6cc1\uff0c\u82e5\u767c\u751f\u6b64\u60c5\u6cc1\u6642\u901a\u5e38\u90fd\u6703\u611f\u89ba\u662f\u4e0d\u662f\u505a\u932f\u4e86\u4ec0\u9ebc\u3002\u9019\u7a2e\u60c5\u6cc1\u7684\u4e3b\u8981\u539f\u56e0\u662f\u8cc7\u6599\u6d29\u6f0f\uff0c\u4e5f\u5c31\u662f\u4e0a\u4e00\u7bc0\u6211\u5011\u8a0e\u8ad6\u7684\u60c5\u6cc1\u3002\u6216\u662f\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599\u7b46\u6578\u592a\u5c11\uff0c\u6c92\u8fa6\u6cd5\u8db3\u4ee5\u9a57\u8b49\u6a21\u578b\u597d\u58de\u3002 \u53e6\u5916\u6709\u6642\u5019\u6211\u5011\u4e5f\u6703\u5f97\u5230\u5728\u8a13\u7df4\u96c6\u6709\u5f88\u597d\u7684\u8868\u73fe\u4f46\u6e2c\u8a66\u96c6\u7121\u6575\u5dee\u7684\u60c5\u6cc1\u3002\u7576\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u5dee\u7570\u5f88\u5927\u6642\uff0c\u554f\u984c\u5f80\u5f80\u8207\u6e2c\u8a66\u96c6\u6709\u95dc\u800c\u4e0d\u662f\u904e\u5ea6\u64ec\u5408\u3002\u9019\u6642\u5019\u4f60\u53ef\u80fd\u8981\u6aa2\u67e5\u8cc7\u6599\u9810\u8655\u7406\u7684\u65b9\u5f0f\u662f\u5426\u4e00\u81f4 (\u50cf\u662f\u53d6 log \u6216 scale)\uff0c\u6216\u662f\u53ea\u662f\u5fd8\u8a18\u5c0d\u6e2c\u8a66\u96c6\u505a\u8f49\u63db\u8655\u7406\u3002 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\uff0c\u7e3d\u4e4b\u5728\u8a13\u7df4\u597d\u6a21\u578b\u6642\u8acb\u4ed4\u7d30\u6aa2\u67e5\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u8ddd\u3002\u4e26\u4e14\u53ef\u4ee5\u900f\u904e\u6b64\u8a55\u4f30\u65b9\u5f0f\u6aa2\u8996\u6a21\u578b\u662f\u5426\u904e\u64ec\u5408\uff0c\u540c\u6642\u4e5f\u80fd\u9032\u884c\u6a21\u578b\u689d\u53c3\u6216\u662f\u9078\u64c7\u6700\u4f73\u7684\u8cc7\u6599\u9810\u8655\u7406\u65b9\u5f0f\u3002\u4e26\u70ba\u6700\u7d42\u7684\u6a21\u578b\u505a\u6700\u4f73\u7684\u6e96\u5099\u3002 7. \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u6211\u60f3\u5927\u5bb6\u61c9\u8a72\u90fd\u719f\u7df4\u638c\u63e1\u4e86 overfitting \u9019\u500b\u8b70\u984c\u3002\u9019\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u4e00\u500b\u8feb\u5207\u554f\u984c\uff0c\u4e26\u5df2\u7d93\u8a2d\u8a08\u4e86\u7121\u6578\u500b\u65b9\u6cd5\u4f86\u89e3\u6c7a\u5b83\u3002\u6700\u57fa\u672c\u7684\u65b9\u6cd5\u662f\u5c07\u4e00\u90e8\u5206\u6578\u64da\u4f5c\u70ba\u6e2c\u8a66\u96c6\u4f86\u6a21\u64ec\u548c\u6e2c\u91cf\u6a21\u578b\u5728\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u7684\u6027\u80fd\u3002\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u8abf\u6574\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u76f4\u5230\u6a21\u578b\u5728\u8a72\u7279\u5b9a\u6e2c\u8a66\u96c6\u4e0a\u9054\u5230\u6700\u9ad8\u5206\u6578\uff0c\u9019\u53c8\u610f\u5473\u8457\u67d0\u7a2e\u542b\u7fa9\u7684\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u6703\u5c07\u5b8c\u6574\u6578\u64da\u7684\u53e6\u4e00\u90e8\u5206\u4f5c\u70ba \u9a57\u8b49\u96c6 \u518d\u6b21\u89e3\u6c7a\u9019\u500b\u554f\u984c\u3002\u6a21\u578b\u5c07\u5728\u8a13\u7df4\u6578\u64da\u4e0a\u9032\u884c\u8a13\u7df4\uff0c\u4e26\u5728\u9a57\u8b49\u96c6\u4e0a\u5fae\u8abf\u5176\u53c3\u6578\uff0c\u4e26\u5728\u6e2c\u8a66\u96c6\u4e0a\u9032\u884c\u6700\u7d42\u8a55\u4f30\u3002 \u4f46\u662f\u5c07\u6211\u5011\u5bf6\u8cb4\u7684\u6578\u64da\u5206\u6210\u4e09\u7d44\u610f\u5473\u8457\u6a21\u578b\u53ef\u4ee5\u5b78\u7fd2\u7684\u6578\u64da\u91cf\u66f4\u5c11\u3002\u6b64\u5916\u6a21\u578b\u7684\u6574\u9ad4\u9810\u6e2c\u6027\u80fd\u5c07\u53d6\u6c7a\u65bc\u90a3\u5c0d\u7279\u5b9a\u7684\u8a13\u7df4\u96c6\u548c\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u6642\u6700\u5e38\u4f7f\u7528 K-Fold cross-validation \u89e3\u6c7a\u4e0a\u8ff0\u554f\u984c\u3002\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6211\u7684\u524d\u5169\u5929\u6587\u7ae0 [Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4ee5\u53ca [Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u3002\u6839\u64da\u6211\u5011\u8a2d\u5b9a\u7684 K \u503c\uff0c\u53ef\u4ee5\u5b8c\u6574\u7684\u5c07\u6578\u64da\u88ab\u5206\u6210 K \u7d44 folds\uff0c\u5c0d\u65bc\u6bcf\u500b folds \u6bcf\u6b21\u6a21\u578b\u8a13\u7df4\u6703\u628a K-1 \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\uff0c\u800c\u5269\u4e0b\u7684\u88ab\u6b78\u985e\u70ba\u9a57\u8b49\u96c6\u3002\u7576\u6a21\u578b\u4ea4\u53c9\u9a57\u8b49\u7d50\u675f\u5f8c\uff0c\u8a13\u7df4\u96c6\u6240\u6709\u8cc7\u6599\u6703\u88ab\u5b8c\u6574\u7684\u8a13\u7df4\u3002 8. \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u5728\u9810\u8a2d\u7684\u60c5\u6cc1\u4e0b\u6240\u6709 Sklearn \u5206\u985e\u5668\u5728\u547c\u53eb score() \u51fd\u6578\u6642\u90fd\u4f7f\u7528\u6e96\u78ba\u5ea6\u4f5c\u70ba\u8a55\u5206\u65b9\u6cd5\u3002\u7531\u65bc\u6e96\u78ba\u7387\u7684\u8a08\u7b97\u65b9\u5f0f\u7c21\u55ae\u8207\u5bb9\u6613\u7406\u89e3\uff0c\u56e0\u6b64\u7d93\u5e38\u6703\u770b\u5230\u521d\u5b78\u8005\u5ee3\u6cdb\u4f7f\u7528\u5b83\u4f86\u5224\u65b7\u5176\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e0d\u5e78\u7684\u662f\u9019\u7a2e\u4e00\u822c\u6e96\u78ba\u7387\u7684\u8a55\u4f30\u65b9\u5f0f\u53ea\u5c0d\u985e\u5225\u5e73\u8861\u7684\u4e8c\u5143\u5206\u985e\u554f\u984c\u6709\u7528\u3002 \u7136\u800c\u5728\u5176\u4ed6\u7684\u72c0\u6cc1\u4e0b\u5b83\u662f\u4e00\u500b\u8aa4\u5c0e\u6027\u7684\u6307\u6a19\uff0c\u5373\u4f7f\u662f\u8868\u73fe\u6700\u5dee\u7684\u6a21\u578b\u4e5f\u53ef\u80fd\u80cc\u5f8c\u96b1\u85cf\u8457\u9ad8\u6e96\u78ba\u5ea6\u7684\u5206\u6578\u3002\u8209\u4f8b\u4f86\u8aaa\u6709\u500b\u5075\u6e2c\u5783\u573e\u90f5\u4ef6\u7684\u6a21\u578b\u5b83\u7684\u6e96\u78ba\u7387 90%\uff0c\u4f46\u662f\u5be6\u969b\u4e0a\u5b83\u6839\u672c\u7121\u6cd5\u5075\u6e2c\u5230\u5783\u573e\u90f5\u4ef6\u3002\u9019\u662f\u70ba\u4ec0\u9ebc\uff1f\u7531\u65bc\u5783\u573e\u90f5\u4ef6\u4e26\u4e0d\u5e38\u898b\uff0c\u5206\u985e\u5668\u53ef\u4ee5\u6aa2\u6e2c\u6240\u6709\u975e\u5783\u573e\u90f5\u4ef6\uff0c\u5373\u4f7f\u5206\u985e\u5668\u5b8c\u5168\u7121\u6cd5\u9054\u5230\u5176\u76ee\u7684\u9019\u4e5f\u53ef\u4ee5\u63d0\u9ad8\u5176\u6e96\u78ba\u6027\u3002\u56e0\u70ba\u9019\u500b\u5206\u985e\u5668\u50c5\u53ef\u4ee5\u5206\u985e\u9019\u4e9b\u6b63\u5e38\u90f5\u4ef6\uff0c\u7a00\u5c11\u7684\u5783\u573e\u90f5\u4ef6\u6839\u672c\u8b8a\u8a8d\u4e0d\u51fa\u4f86\u3002 \u5c0d\u65bc\u591a\u5143\u985e\u5206\u985e\u7684\u554f\u984c\u66f4\u662f\u61c9\u8a72\u6ce8\u610f\u4f60\u7684\u6a21\u578b\u8a55\u4f30\u6307\u6a19\u3002\u5982\u679c\u9054\u5230 80% \u7684\u6e96\u78ba\u7387\uff0c\u662f\u5426\u610f\u5473\u8457\u6a21\u578b\u5728\u9810\u6e2c\u985e\u52251\u3001\u985e\u52252\u3001\u985e\u52253\u751a\u81f3\u6240\u6709\u985e\u6642\u4e00\u6a23\u6e96\u78ba\u5462\uff1f\u4e00\u822c\u7684\u6e96\u78ba\u7387\u6c38\u9060\u7121\u6cd5\u56de\u7b54\u6b64\u985e\u554f\u984c\uff0c\u4f46\u5e78\u904b\u7684\u662f\u5176\u4ed6\u5206\u985e\u6307\u6a19\u63d0\u4f9b\u4e86\u66f4\u591a\u7684\u8a0a\u606f\u6307\u6a19\u3002\u5b83\u5c31\u662f \u6df7\u6dc6\u77e9\u9663 (confusion matrix)\u3002 from sklearn.metrics import confusion_matrix y_true = [ 2 , 0 , 2 , 2 , 0 , 1 ] y_pred = [ 0 , 0 , 2 , 2 , 0 , 2 ] confusion_matrix ( y_true , y_pred ) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) \u7d44\u6210\u6df7\u6dc6\u77e9\u9663\u7684\u56db\u500b\u5143\u7d20\u5206\u5225\u6709 TP\u3001TN\u3001FP\u3001FN\u3002\u57fa\u672c\u4e0a\u6df7\u6dc6\u77e9\u9663\u6703\u62ff\u9019\u56db\u500b\u6307\u6a19\u505a\u53c3\u8003\uff0c\u540c\u6642\u7b97\u51fa\u4f86\u7684\u5206\u6578\u4e5f\u66f4\u80fd\u53bb\u8a55\u4f30\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u7684\u7d50\u679c\u3002\u6b64\u5916\u6211\u5011\u53ef\u4ee5\u5229\u7528\u6df7\u6dc6\u77e9\u9663\u4f86\u8a08\u7b97 Precision\u3001Recall\u3001Accuracy \u7b49\u5206\u6578\u3002 TP(True Positive): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\u771f\u5be6\u7b54\u6848(Ground True)\u662f\u8c93\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93\uff0c\u5373\u70baTP TN(True Negative): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u8ca0\u6a23\u672c\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u6a19\u793a\u6210\u4e0d\u662f\u8c93\uff0c\u5373\u70baTN FP(False Positive): \u932f\u8aa4\u9810\u6e2c\u6210\u6b63\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u8ca0\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93 FN(False Negative): \u932f\u8aa4\u9810\u6e2c\u6210\u8ca0\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u4e0d\u662f\u8c93 9. \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u9810\u6e2c\u9023\u7e8c\u6027\u6578\u503c\u8f38\u51fa\u7684\u8ff4\u6b78\u6a21\u578b\u4e2d\uff0c\u5927\u5bb6\u5f80\u5f80\u6703\u76f4\u63a5\u547c\u53eb\u6a21\u578b\u63d0\u4f9b\u7684\u8a55\u4f30\u65b9\u6cd5\u76f4\u63a5\u8a08\u7b97 score \u3002\u7136\u800c\u9019\u500b\u5206\u6578\u5728\u8ff4\u6b78\u6a21\u578b\u4e2d\u662f\u8a08\u7b97 R2 \u5206\u6578\uff0c\u53c8\u7a31\u5224\u5b9a\u4fc2\u6578 (coefficient of determination)\u3002\u6240\u8b02\u7684\u5224\u5b9a\u4fc2\u6578\u662f\u8f38\u5165\u7279\u5fb5 (x) \u53bb\u89e3\u91cb\u8f38\u51fa (y) \u7684\u8b8a\u7570\u7a0b\u5ea6\u6709\u591a\u5c11\uff0c\u5176\u8a08\u7b97\u516c\u5f0f\u662f\uff1a\u8ff4\u6b78\u6a21\u578b\u7684\u8b8a\u7570\u91cf (SSR)/\u7e3d\u8b8a\u7570\u91cf (TSS) \u3002\u7528\u4ee5\u4e0b\u8b8a\u7570\u6578\u5206\u6790\u8868\uff08ANOVA table\uff09\u4f86\u8aaa TSS \u5c31\u662f\u8a08\u7b97\u7e3d\u8b8a\u7570\uff0c\u628a\u6bcf\u500b\u5be6\u969b\u7684 y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u800c SSR \u5c31\u662f\u628a\u6240\u6709\u7684\u6a21\u578b\u9810\u6e2c y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u5982\u679c R2 \u5206\u6578\u5f88\u9ad8\u8d8a\u63a5\u8fd1 1\uff0c\u8868\u793a\u6a21\u578b\u7684\u89e3\u91cb\u80fd\u529b\u5f88\u9ad8\u3002 \u5728\u5b78\u8853\u7814\u7a76\u4e0a\u6700\u76f4\u89ba\u7684\u89c0\u5ff5\u662f R2 \u5206\u6578\u6108\u63a5\u8fd1 1 \u8d8a\u597d\uff0c\u4e5f\u6709\u4e9b\u4eba\u900f\u904e\u4e00\u4e9b\u624b\u6bb5\u4f86\u88fd\u9020 R2 \u5206\u6578\u5f88\u9ad8\u7684\u5047\u8c61\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5176\u5be6\u53ea\u900f\u904e R2 \u500b\u8a55\u4f30\u6307\u6a19\u5c31\u4f86\u6c7a\u5b9a\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u662f\u4e0d\u592a\u597d\u7684\u7fd2\u6163\u3002\u66f4\u9032\u4e00\u6b65\u53ef\u4ee5\u4f7f\u7528 MSE\u3001MAE \u7b49\u6b98\u5dee\u7684\u8a55\u4f30\u503c\u6a19\u4f86\u770b\u6bcf\u7b46\u8cc7\u6599\u5be6\u969b\u503c\u8207\u9810\u6e2c\u503c\u7684\u8aa4\u5dee\u3002\u6216\u662f\u4f7f\u7528\u76f8\u5c0d\u8aa4\u5dee\u4f86\u89c0\u5bdf\u9810\u6e2c\u6a21\u578b\u7684\u53ef\u4fe1\u5ea6\u3002\u6b64\u5916\u7b46\u8005\u9084\u5efa\u8b70\u53ef\u4ee5\u8a66\u8457\u628a\u6bcf\u7b46\u8cc7\u6599\u7684\u771f\u5be6 y \u8207\u6a21\u578b\u9810\u6e2c\u7684 \u0177 \u7e6a\u88fd\u51fa\u4f86\uff0c\u82e5\u5448\u73fe\u4e00\u689d\u660e\u986f\u7684\u7531\u5de6\u4e0b\u5230\u53f3\u4e0a\u659c\u76f4\u7dda\uff0c\u5247\u8868\u793a\u6a21\u578b\u6240\u9810\u6e2c\u7684\u7d50\u679c\u8207\u771f\u5be6\u7b54\u6848\u5f88\u76f8\u8fd1\u3002 10. \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a \u8fd1\u5e7e\u5e74 AI \u7684\u767c\u5c55\u60f3\u5fc5\u5927\u5bb6\u6709\u76ee\u5171\u7779\uff0c\u5f9e\u5f71\u50cf\u8b58\u5225\u5230\u7269\u4ef6\u8fa8\u8b58\u7684\u6280\u8853\u6709\u8457\u91cd\u5927\u7684\u9032\u5c55\u3002\u6b64\u5916 2016 \u5e74 Google Deepmind \u5718\u968a\u7684 AlphaGo \u9996\u5ea6\u6253\u6557\u4eba\u985e\uff0c\u9019\u4e5f\u5728\u4eba\u6a5f\u5c0d\u5f08\u4e0a\u958b\u555f\u4e86\u4e00\u9805\u91cd\u8981\u7684\u91cc\u7a0b\u7891\u3002\u751a\u81f3\u5728\u81ea\u7136\u8a9e\u8a00\u65b9\u9762\uff0c\u6b78\u529f\u65bc\u65b0\u7684\u6a21\u578b\u67b6\u69cb\u8207\u786c\u9ad4\u8cc7\u6e90\u7684\u9032\u6b65\uff0c\u4f7f\u5f97\u81ea\u7136\u8a9e\u8a00\u6709\u91cd\u5927\u7684\u7a81\u7834\u3002\u770b\u5230\u9019\u9ebc\u591a AI \u7684\u7f8e\u597d\u8b93\u5927\u5bb6\u518d\u6b21\u5c0d\u6df1\u5ea6\u5b78\u7fd2\u9ede\u71c3\u5e0c\u671b\uff01\u53ea\u4e0d\u904e AI \u4e26\u975e\u842c\u80fd\uff0c\u5207\u8a18\uff01\u6240\u6709\u7684\u554f\u984c\u4e26\u4e0d\u662f\u5c07\u8cc7\u6599\u6536\u96c6\u597d\uff0c\u4e26\u5c07\u8cc7\u6599\u4e1f\u7d66\u96fb\u8166\u5b78\u7fd2\u5c31\u6703\u5f97\u5230\u4f60\u60f3\u8981\u7684\u7d50\u679c\u3002\u5927\u5bb6\u4e5f\u8a31\u6703\u9677\u5165\u300c\u70ba AI \u800c AI\u300d \u7684\u8ff7\u601d\uff0c\u5f88\u591a\u7684\u4efb\u52d9\u5176\u5be6\u900f\u904e\u5177\u6709\u898f\u5247\u7684\u5c08\u5bb6\u7cfb\u7d71\u6216\u662f\u50b3\u7d71\u6f14\u7b97\u6cd5\u5c31\u53ef\u4ee5\u9054\u5230\u5f88\u4e0d\u932f\u7684\u7d50\u679c\u3002\u518d\u8005\u6211\u5011\u90fd\u5c0d AI \u7684\u6280\u8853\u611f\u5230\u7279\u5225\u6b61\u559c\u8207\u671f\u5f85\uff0c\u4f46\u662f AI \u7684\u9ed1\u76d2\u5b50\u4eba\u985e\u5f80\u5f80\u4e0d\u77e5\u9053\u6a21\u578b\u4e0b\u4e00\u6b65\u6703\u7522\u751f\u4ec0\u9ebc\u4e0d\u53ef\u9810\u671f\u7684\u7d50\u679c\u3002\u5176\u5be6 AI \u6709\u5f88\u591a\u7684\u9650\u5236\u8207\u6311\u6230\uff0c\u9664\u4e86\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4ee5\u5916\uff0c\u6211\u5011\u66f4\u9700\u8981\u95dc\u6ce8\u7684\u662f\u6a21\u578b\u5728\u60f3\u4ec0\u9ebc\u3002\u53ef\u89e3\u91cb\u4eba\u5de5\u667a\u6167\u5fc5\u7136\u662f\u6211\u5011\u8981\u63a2\u8a0e\u7684\u4e00\u6bb5\u8ab2\u984c\u3002AI \u8207\u6a5f\u5668\u4eba\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u8981\u53d6\u4ee3\u4eba\u985e\uff0c\u6211\u8a8d\u70ba AI \u6bd4\u8f03\u9069\u5408\u626e\u6f14\u8f14\u52a9\u4eba\u985e\u7684\u91cd\u8981\u89d2\u8272\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#day-27","text":"","title":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#_1","text":"\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u7684\u5341\u500b\u932f\u8aa4","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#_2","text":"\u4eba\u5de5\u667a\u6167\u8fd1\u5e74\u4f86\u6210\u70ba\u4efb\u4f55\u7522\u696d\u71b1\u9580\u7684\u8a71\u984c\u4e4b\u4e00\uff0c\u5404\u516c\u53f8\u7a4d\u6975\u5730\u5c0e\u5165\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u5354\u52a9\u7522\u696d AI \u5316\u3002\u4f8b\u5982\uff1a\u667a\u6167\u91ab\u7642\u3001\u667a\u6167\u4ea4\u901a\u3001\u667a\u6167\u88fd\u9020......\u7b49\u3002\u6b63\u662f\u56e0\u70ba AI \u6280\u8853\u7684\u5275\u65b0\u8207\u666e\u53ca\uff0c\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u518d\u4e5f\u4e0d\u662f\u7406\u5de5\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\u3002\u6b64\u5916\u96a8\u8457 Python \u958b\u767c\u793e\u7fa4\u8301\u58ef\uff0c\u8a31\u591a\u958b\u6e90\u7684 AI \u5957\u4ef6\u5982\u96e8\u5f8c\u6625\u7b4d\u822c\u7684\u51fa\u73fe\u5927\u5927\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u9580\u6abb\u3002\u5728\u4eca\u5929\u7684\u5167\u5bb9\u4e2d\u6211\u60f3\u85c9\u7531\u9435\u4eba\u8cfd\u4f86\u8ddf\u5927\u5bb6\u5206\u4eab\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b\uff0c\u4e26\u4e14\u5f9e\u8cc7\u6599\u9762\u8207\u6a21\u578b\u9762\u7684\u89d2\u5ea6\u4f86\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u61c9\u8a72\u6ce8\u610f\u7684\u5e7e\u4ef6\u4e8b\u3002\u5c24\u5176\u662f\u5728\u521d\u5b78\u968e\u6bb5\uff0c\u56e0\u7f3a\u4e4f\u7d93\u9a57\u5f80\u5f80\u6703\u72af\u4e00\u4e9b\u7121\u53ef\u907f\u514d\u7684\u932f\u8aa4\u3002\u6240\u4ee5\u9019\u7bc7\u6587\u7ae0\u5c07\u9ede\u51fa\u5341\u500b\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5e38\u72af\u7684\u96b1\u5f62\u932f\u8aa4\u3002 \u8cc7\u6599\u9762 \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u6a21\u578b\u9762 \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a","title":"\u524d\u8a00"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#1","text":"\u6a5f\u5668\u5b78\u7fd2\u9996\u8981\u7684\u6b65\u9a5f\u662f\u5b9a\u7fa9\u554f\u984c\uff0c\u7576\u78ba\u5b9a\u76ee\u6a19\u8207\u65b9\u5411\u5f8c\u5373\u53ef\u958b\u59cb\u641c\u96c6\u8cc7\u6599\u3002\u76f8\u4fe1\u5927\u5bb6\u90fd\u77e5\u9053\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f97\u4f86\u4e0d\u6613\uff0c\u5373\u4f7f\u5f9e\u8cc7\u6599\u5eab\u53d6\u5f97\u4e86\u9019\u4e9b\u8cc7\u6599\u5f8c\u6211\u5011\u9084\u9700\u8981\u82b1\u5927\u91cf\u7684\u6642\u9593\u9032\u884c\u8cc7\u6599\u6e05\u6d17\u3002\u6240\u8b02\u7684\u8cc7\u6599\u6e05\u6d17\u662f\u8cc7\u6599\u5eab\u7576\u4e2d\u53ef\u80fd\u6703\u6709\u7f3a\u5931\u503c\uff0c\u4f8b\u5982\uff1aNA\u3001Inf\u3001NaN\u3001NULL\u3002 NA\uff1a\u8868\u793a\u7f3a\u5931\u503c\uff0c\u662f Not Available \u7684\u7e2e\u5beb\u3002 Inf\uff1a\u8868\u793a\u7121\u7aae\u5927\uff0c\u662f Infinite \u7684\u7e2e\u5beb\u3002 NaN\uff1a\u8868\u793a\u975e\u6578\u503c\uff0c\u662f Not a Number \u7684\u7e2e\u5beb\u3002 NULL\uff1a\u8868\u793a\u7a7a\u503c\uff0c\u5373\u6c92\u6709\u5167\u5bb9\u3002 \u7576\u8cc7\u6599\u90fd\u5b8c\u6210\u4e86\u524d\u8655\u7406\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u5efa\u7acb\u6a21\u578b\u8207\u8a55\u4f30\u6a21\u578b\u3002\u4f46\u662f\u7576\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u8868\u73fe\u4e0d\u597d\u6709\u5f88\u591a\u7684\u56e0\u7d20\u3002\u5927\u5bb6\u6700\u5e38\u505a\u7684\u662f\u66ff\u63db\u6a21\u578b\u6f14\u7b97\u6cd5\uff0c\u6216\u662f\u5617\u8a66\u4e0d\u540c\u7684\u6a21\u578b\u8d85\u53c3\u6578\u53d6\u5f97\u4e00\u500b\u6700\u4f73\u7684\u7d50\u679c\u3002\u4f46\u662f\u5728\u9032\u884c\u9019\u4e9b\u505a\u4e4b\u524d\uff0c\u5efa\u8b70\u5927\u5bb6\u5148\u628a\u95dc\u6ce8\u7684\u9ede\u56de\u5230\u8cc7\u6599\u8655\u7406\u9762\u3002\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u5176\u4e2d\u4e00\u500b\u56e0\u7d20\u662f\u8cc7\u6599\u7684\u6a19\u7c64\u6536\u96c6\u4e0d\u7576\u3002Landing.ai \u57f7\u884c\u9577\u5433\u6069\u9054\u4e5f\u66fe\u7d93\u8aaa\u904e\u7576\u4e00\u500b\u5c0f\u8cc7\u6599\u96c6\u5b58\u5728\u8457\u932f\u8aa4\u6a19\u7c64\u6642\uff0c\u6a21\u578b\u5f88\u96e3\u7d66\u51fa\u4e00\u500b\u6b63\u78ba\u7684\u8f38\u51fa\u3002\u56e0\u70ba\u8cc7\u6599\u9593\u593e\u5e36\u4e86\u96dc\u8a0a\u5f80\u5f80\u6703\u4f7f\u7684\u6a21\u578b\u5b58\u5728\u8457\u4e00\u4e9b\u504f\u5dee\uff0c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002\u56e0\u6b64\u7b46\u8005\u5efa\u8b70\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u6642\u5019\uff0c\u53ef\u4ee5\u56de\u982d\u89c0\u5bdf\u8cc7\u6599\u662f\u5426\u5b58\u5728\u4e00\u4e9b\u932f\u8aa4\u3002\u800c\u4e0d\u662f\u4e00\u6627\u7684\u8abf\u6574\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u8d85\u53c3\u6578\u3002","title":"1. \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#2","text":"\u5728\u5206\u985e\u7684\u8cc7\u6599\u4e2d\uff0c\u521d\u5b78\u8005\u5e38\u898b\u7684\u932f\u8aa4\u662f\u5fd8\u8a18\u4f7f\u7528\u5206\u5c64\u62bd\u6a23 (stratify) \u4f86\u5c0d\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u9032\u884c\u5207\u5272\u3002\u7576\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u76e1\u53ef\u80fd\u8207\u8a13\u7df4\u76f8\u540c\u60c5\u6cc1\u4e0b\uff0c\u6a21\u578b\u624d\u66f4\u6709\u53ef\u80fd\u5f97\u5230\u66f4\u6e96\u78ba\u7684\u9810\u6e2c\u3002\u7136\u800c\u5728\u5206\u985e\u7684\u554f\u984c\u4e2d\uff0c\u6211\u5011\u5fc5\u9808\u66f4\u95dc\u6ce8\u6bcf\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002\u4ee5\u4e0b\u8209\u500b\u4f8b\u5b50\uff1a\u5047\u8a2d\u6211\u5011\u6709\u4e09\u500b\u6a19\u7c64\u7684\u985e\u5225\uff0c\u800c\u9019\u4e09\u500b\u985e\u5225\u7684\u5206\u4f48\u6bd4\u4f8b\u5206\u5225\u70ba 4:3:3\u3002\u540c\u7406\u6211\u5011\u5728\u9032\u884c\u8cc7\u6599\u5207\u5272\u7684\u6642\u5019\u5fc5\u9808\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u9700\u8981\u6709\u76f8\u540c\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002 \u5927\u5bb6\u61c9\u8a72\u90fd\u4f7f\u7528\u904e Sklearn \u7684 train_test_split \u9032\u884c\u8cc7\u6599\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d Sklearn \u63d0\u4f9b\u4e86\u4e00\u500b stratify \u53c3\u6578\u9054\u5230\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u7684\u76ee\u7684\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\uff1a\u4f8b\u5982\uff0c\u8ca0\u6a23\u672c\u8207\u6b63\u6a23\u672c\u6bd4\u4f8b\u61f8\u6b8a(\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u3001\u96e2\u8077\u54e1\u5de5\u9810\u6e2c)\u3002\u4ee5\u4e0b\u7528\u7d05\u9152\u5206\u985e\u9810\u6e2c\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u9996\u5148\u6211\u5011\u4e0d\u4f7f\u7528 stratify \u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u4e26\u67e5\u770b\u8cc7\u6599\u5207\u5272\u524d\u5f8c\u7684\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b\u3002 from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split X , y = load_wine ( return_X_y = True ) # \u67e5\u770b\u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y ) . value_counts ( normalize = True ) # \u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398876 0 0.331461 2 0.269663 dtype: float64 # \u5be6\u9a57\u4e00: \u4e0d\u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.390977 0 0.330827 2 0.278195 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.511111 0 0.266667 2 0.222222 dtype: float64 \u5f9e\u4e0a\u9762\u5207\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u90fd\u4e0d\u540c\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u4f7f\u7528 stratify \u53c3\u6578\u518d\u5207\u5272\u4e00\u6b21\u3002 # \u5be6\u9a57\u4e8c: \u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.400000 0 0.333333 2 0.266667 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398496 0 0.330827 2 0.270677 dtype: float64 \u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5c07 stratify \u8a2d\u7f6e\u70ba\u76ee\u6a19 (y) \u5728\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u4e2d\u7522\u751f\u76f8\u540c\u7684\u5206\u4f48\u3002\u56e0\u70ba\u6539\u8b8a\u7684\u985e\u5225\u7684\u6bd4\u4f8b\u662f\u4e00\u500b\u56b4\u91cd\u7684\u554f\u984c\uff0c\u53ef\u80fd\u6703\u4f7f\u6a21\u578b\u66f4\u504f\u5411\u65bc\u7279\u5b9a\u7684\u985e\u5225\u3002\u56e0\u6b64\u8a13\u7df4\u8cc7\u6599\u7684\u5206\u4f48\u5fc5\u9808\u8981\u8207\u5be6\u969b\u60c5\u6cc1\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002","title":"2. \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#3","text":"\u8cc7\u6599\u8996\u89ba\u5316\u7684\u597d\u8655\u591a\u591a\uff0c\u5728\u672c\u7cfb\u5217\u6587\u7ae0 [Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\uff1f\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427\uff01 \u8207 [Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u8b1b\u89e3\u4e86\u8a31\u591a Python \u8cc7\u6599\u8996\u89ba\u5316\u7684\u6280\u5de7\u3002\u8cc7\u6599\u8996\u89ba\u5316\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5206\u6790\u8207\u7d71\u8a08\u8cc7\u6599\u7684\u578b\u614b\uff0c\u5f80\u5f80\u6709\u597d\u7684\u8cc7\u6599\u6e05\u6d17\u8207\u524d\u8655\u7406\u5c0d\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u6703\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u6709\u8208\u8da3\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003 \u5b89\u65af\u5eab\u59c6\u56db\u91cd (Anscombe\u2019s quartet) \u3002\u4ed6\u4e3b\u8981\u662f\u662f\u900f\u904e\u56db\u500b\u5c0f\u8cc7\u6599\u96c6\u4e26\u900f\u904e\u8996\u89ba\u5316\u8207\u7d71\u8a08\u4f86\u89c0\u5bdf\uff0c\u4e26\u8aaa\u660e\u5728\u5206\u6790\u6578\u64da\u524d\u5148\u7e6a\u88fd\u5716\u8868\u7684\u91cd\u8981\u6027\uff0c\u4ee5\u53ca\u96e2\u7fa4\u503c\u5c0d\u7d71\u8a08\u7684\u5f71\u97ff\u4e4b\u5927\u3002","title":"3. \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#4-labelencoder","text":"\u901a\u5e38\u6211\u5011\u8981\u70ba\u985e\u5225\u7684\u7279\u5fb5\u9032\u884c\u7de8\u78bc\uff0c\u76f4\u89ba\u6703\u60f3\u5230 Sklearn \u7684 LabelEncoder \u3002\u4f46\u662f\u5982\u679c\u4e00\u500b\u8cc7\u6599\u96c6\u4e2d\u6709\u591a\u500b\u7279\u5fb5\u662f\u5c6c\u65bc\u985e\u5225\u578b\u7684\u8cc7\u6599\uff0c\u8c48\u4e0d\u662f\u5f88\u9ebb\u7169?\u5fc5\u9808\u8981\u4e00\u500b\u4e00\u500b\u547c\u53eb LabelEncoder \u5206\u5225\u70ba\u9019\u4e9b\u7279\u5fb5\u9032\u884c\u8f49\u63db\u3002\u5982\u679c\u4f60\u770b\u5230\u9019\u908a\u6709\u540c\u611f\u7684\uff0c\u5728\u9019\u88e1\u8981\u544a\u8a34\u4f60\u4e8b\u5be6\u4e26\u975e\u5982\u6b64\uff01\u6211\u5011\u770b\u770b \u5728\u5b98\u65b9\u6587\u4ef6\u4e0b LabelEncoder \u7684\u63cf\u8ff0\uff1a This transformer should be used to encode target values, i.e. y, and not the input X. \u7c21\u55ae\u4f86\u8aaa LabelEncoder \u53ea\u662f\u88ab\u7528\u4f86\u7de8\u78bc\u8f38\u51fa\u9805 y \u800c\u5df2\u7684\uff01\u4f60\u9084\u5728\u7528\u5b83\u4f86\u7de8\u78bc\u4f60\u7684\u6bcf\u500b x \u55ce\uff1f\uff08\u6688 \u90a3\u9ebc\u6211\u5011\u8a72\u7528\u4ec0\u9ebc\u65b9\u6cd5\u4f86\u7de8\u78bc\u6709\u9806\u5e8f\u7684\u985e\u5225\u7279\u5fb5\u5462\uff1f\u5982\u679c\u4f60\u4ed4\u7d30\u95b1\u8b80\u6709\u95dc\u7de8\u78bc\u5206\u985e\u7279\u5fb5\u7684 Sklearn \u7528\u6236\u6307\u5357\uff0c\u4f60\u6703\u770b\u5230\u5b83\u6e05\u695a\u5730\u8aaa\u660e\uff1a To convert categorical features to integer codes, we can use the OrdinalEncoder. This estimator transforms each categorical feature to one new feature of integers (0 to n_categories - 1) \u770b\u5230\u9019\u908a\u5927\u5bb6\u61c9\u8a72\u77e5\u9053\u95b1\u8b80\u5b98\u65b9\u6587\u4ef6\u7684\u91cd\u8981\u6027\u5427\uff01\u5b98\u65b9\u6587\u4ef6\u4e2d\u5efa\u8b70 x \u9805\u7684\u8f38\u5165\u7279\u5fb5\u53ef\u4ee5\u63a1\u7528 OrdinalEncoder \u4e00\u6b21\u70ba\u6240\u6709\u7279\u5fb5\u4f9d\u5e8f\u505a Label Encoding\u3002OrdinalEncoder \u7de8\u78bc\u5668\u7684\u4f7f\u7528\u65b9\u5f0f\u5982\u4e0b\uff1a from sklearn.preprocessing import OrdinalEncoder enc = OrdinalEncoder () X = [[ 'Male' , 1 ], [ 'Female' , 3 ], [ 'Female' , 2 ]] enc . fit ( X ) print ( enc . categories_ ) enc . transform ([[ 'Female' , 3 ], [ 'Male' , 1 ]]) [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)] array([[0., 2.], [1., 0.]]) \u4ee5\u4e0a\u7684\u7bc4\u4f8b\u662f X \u6709\u4e09\u7b46\u8cc7\u6599\uff0c\u6bcf\u7b46\u8cc7\u6599\u90fd\u6709\u5169\u500b\u7279\u5fb5\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7b2c\u4e00\u500b\u7279\u5fb5\u662f\u6027\u5225 Male \u8207 Female\uff0c\u56e0\u6b64 OrdinalEncoder \u6703\u4f9d\u9020\u5b57\u6bcd\u958b\u982d\u505a\u6392\u5e8f Female \u7de8\u78bc\u70ba 0 \u800c Male \u7de8\u78bc\u70ba 1\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u7279\u5fb5\u70ba\u6578\u5b57 1\u30012\u30013\uff0c\u540c\u7406\u4f9d\u5e8f\u70ba\u4ed6\u5011\u7de8\u78bc\u6210 0\u30011\u30012\u3002\u53ea\u9700\u95b1\u8b80\u5b98\u65b9\u6587\u6a94\u548c\u7528\u6236\u6307\u5357\uff0c\u4f60\u5c31\u53ef\u4ee5\u4e86\u89e3\u5f88\u591a\u95dc\u65bc Sklearn \u7684\u77e5\u8b58\uff01\u662f\u4e0d\u662f\u5f88\u68d2\uff5e","title":"4. \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#5","text":"\u8cc7\u6599\u6d29\u6f0f (data leakage) \u662f\u500b\u96b1\u5f62\u6bba\u624b\uff0c\u5b83\u6703\u5728\u4e0d\u77e5\u4e0d\u89ba\u4e2d\u5f71\u97ff\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3002\u5176\u767c\u751f\u7684\u6642\u6a5f\u5728\u65bc\u4f60\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u4e0d\u61c9\u8a72\u5c07\u6e2c\u8a66\u7684\u8cc7\u6599\u7684\u8cc7\u8a0a\u6d29\u6f0f\u5230\u8a13\u7df4\u904e\u7a0b\u4e2d\u3002\u5b83\u6703\u9020\u6210\u6a21\u578b\u7d66\u51fa\u4e00\u500b\u975e\u5e38\u6a02\u89c0\u7684\u7d50\u679c\uff0c\u5373\u4f7f\u5728\u4ea4\u53c9\u9a57\u8b49\u4e2d\u4e5f\u662f\u5982\u6b64\uff0c\u4f46\u5728\u5c0d\u5be6\u969b\u65b0\u6578\u64da\u9032\u884c\u6e2c\u8a66\u6642\u8868\u73fe\u6703\u975e\u5e38\u5730\u7cdf\u7cd5\u3002 \u8cc7\u6599\u6d29\u6f0f\u6700\u5e38\u767c\u751f\u65bc\u8cc7\u6599\u524d\u8655\u7406\u7684\u968e\u6bb5\uff0c\u5c24\u5176\u662f\u7576\u4f60\u7684\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5c1a\u672a\u5207\u5272\u7684\u6642\u5019\u3002Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a\u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u6cd5\uff0c\u4f8b\u5982: \u7f3a\u5931\u503c\u88dc\u503c(imputers)\u3001\u6b63\u898f\u5316 (normalizers)\u3001\u6a19\u6e96\u5316(standardization)\u4ee5\u53ca\u5c0d\u6578(log) \u8f49\u63db...\u7b49\u3002\u9019\u4e9b\u8f49\u63db\u5668\u90fd\u6703\u4f9d\u8cf4\u65bc\u4f60\u8f38\u5165\u8cc7\u6599\u7684\u5206\u4f48\uff0c\u4e26\u4f9d\u7167\u6b64\u5206\u4f48\u505a\u76f8\u5c0d\u61c9\u7684\u64ec\u5408\u3002 \u8209\u4f8b\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a19\u6e96\u5316\u6642(StandardScaler)\u900f\u904e\u5f9e\u6bcf\u7b46\u8cc7\u6599\u4e2d\u6e1b\u53bb\u5e73\u5747\u503c\u4e26\u5c07\u5176\u9664\u4ee5\u6a19\u6e96\u504f\u5dee\u4f86\u7372\u5f97\u7e2e\u653e\u5f8c\u7684\u6578\u64da\u3002\u6211\u5011\u4f7f\u7528 fit() \u65b9\u6cd5\u5728\u6240\u6709\u8cc7\u6599\u96c6 X \u4e0a\u505a\u8f49\u63db\uff0c\u4e26\u4f7f\u5f97\u8f49\u63db\u5668\u5b78\u7fd2\u6bcf\u500b\u7279\u5fb5\u7684\u6574\u500b\u5206\u4f48\u7684\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee\u3002\u9019\u4e9b\u8cc7\u6599\u8f49\u63db\u5f8c\u5982\u679c\u518d\u5c07\u9019\u4e9b\u6578\u64da\u62c6\u5206\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5247\u8a13\u7df4\u96c6\u6703\u53d7\u5230\u6c61\u67d3\u3002\u56e0\u70ba StandardScaler \u5f9e\u5be6\u969b\u5206\u4f48\u4e2d\u6d29\u9732\u4e86\u6e2c\u8a66\u96c6\u91cd\u8981\u8a0a\u606f\uff0c\u4e00\u822c\u4f86\u8aaa\u6211\u5011\u4e0d\u80fd\u5c07\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u60c5\u6cc1\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u3002\u96d6\u7136\u6211\u5011\u5e0c\u671b\u8a13\u7df4\u96c6\u7684\u5206\u4f48\u8207\u5be6\u969b\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u56e0\u70ba\u4f7f\u5f97\u6a21\u578b\u8868\u73fe\u7d50\u679c\u7a69\u5b9a\u3002 \u96d6\u7136\u6211\u5011\u628a\u6e2c\u8a66\u96c6\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u4e26\u505a\u8f49\u63db\uff0c\u9019\u4e00\u6b65\u9a5f\u5c0d\u6211\u5011\u4f86\u8aaa\u53ef\u80fd\u6c92\u4ec0\u9ebc\u3002\u4f46\u662f\u5c0d\u65bc Sklearn \u5f37\u5927\u7684\u6f14\u7b97\u6cd5\uff0c\u53ef\u80fd\u6703\u900f\u904e\u9019\u500b\u907a\u6f0f\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u7684\u8a0a\u606f\u628a\u6a21\u578b\u64ec\u5408\u7684\u5f88\u597d\u3002\u5c46\u6642\u6a21\u578b\u8a13\u7df4\u5b8c\u6210\u5f8c\uff0c\u6e2c\u8a66\u96c6\u4e0d\u5920\u65b0\u7a4e\uff0c\u7121\u6cd5\u5728\u5be6\u969b\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002 \u6700\u7c21\u55ae\u7684\u89e3\u6c7a\u8fa6\u6cd5\uff0c\u5c31\u662f\u4e0d\u8981\u4f7f\u7528 fit() \u4e00\u6b21\u8f49\u63db\u6240\u6709\u7684\u8cc7\u6599\u3002\u5728\u505a\u4efb\u4f55\u8cc7\u6599\u8f49\u63db\u4e4b\u524d\u8981\u5148\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5df2\u7d93\u5b8c\u6574\u5730\u88ab\u5207\u958b\u3002\u5373\u4f7f\u5207\u958b\u5f8c\u4e5f\u4e0d\u8981\u518d\u62ff\u6e2c\u8a66\u96c6\u547c\u53eb fit() \u6216 fit_transform() \uff0c\u9019\u4e00\u6a23\u6703\u5c0e\u81f4\u76f8\u540c\u554f\u984c\u767c\u751f\u3002\u56e0\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5fc5\u9808\u9032\u884c\u76f8\u540c\u7684\u8f49\u63db\uff0c\u4f9d\u7167\u5b98\u65b9\u7684\u7bc4\u4f8b\u6211\u5011\u5fc5\u9808\u5148\u4f7f\u7528 fit_transform() \u5728\u8a13\u7df4\u96c6\u4e0a\u9032\u884c\u64ec\u5408\u8207\u8f49\u63db\u3002\u9019\u78ba\u4fdd\u4e86\u8f49\u63db\u5668\u50c5\u5f9e\u8a13\u7df4\u96c6\u5b78\u7fd2\uff0c\u5f9e\u4e2d\u627e\u51fa\u53c3\u6578\u4f8b\u5982\u5e73\u5747\u503c\u8207\u8b8a\u7570\u6578\u4e26\u540c\u6642\u5c0d\u5176\u9032\u884c\u8b8a\u63db\u3002\u63a5\u8457\u4f7f\u7528 transform() \u65b9\u6cd5\u5728\u6e2c\u8a66\u8cc7\u6599\u4e0a\u9032\u884c\u8f49\u63db\uff0c\u6839\u64da\u5f9e\u8a13\u7df4\u6578\u64da\u4e2d\u5b78\u5230\u7684\u8a0a\u606f\u9032\u884c\u8f49\u63db\u3002 from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler X , y = load_iris ( return_X_y = True ) X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y , random_state = 44 ) scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) X_test_scaled = scaler . transform ( X_test ) \u66f4\u5f37\u5927\u7684\u89e3\u6c7a\u65b9\u6848\u662f\u4f7f\u7528 Sklearn \u5167\u5efa\u7684 pipeline\uff0c\u5b83\u80fd\u5920\u4fdd\u8b77\u6a21\u578b\u514d\u65bc\u8cc7\u6599\u6d29\u6f0f\u7684\u554f\u984c\u3002\u6b64\u65b9\u6cd5\u80fd\u5920\u78ba\u4fdd\u8a13\u7df4\u8cc7\u6599\u50c5\u53c3\u8207\u8f49\u63db\u64ec\u5408\u8207\u6a21\u578b\u8a13\u7df4\uff0c\u800c\u6e2c\u8a66\u8cc7\u6599\u50c5\u7528\u65bc\u8a08\u7b97\u4e26\u9a57\u8b49\u6a21\u578b\u3002","title":"5. \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#6","text":"\u5982\u679c\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599 R2 score \u5f97\u5230\u4e86 0.85 \u5c31\u4ee3\u8868\u5f88\u597d\u4e86\u55ce\uff1f\u4e0d\u76e1\u7136\uff01\u5118\u7ba1\u6709\u9ad8\u7684\u6e2c\u8a66\u5206\u6578\u901a\u5e38\u610f\u5473\u8457\u6a21\u578b\u8868\u73fe\u4f73\uff0c\u4f46\u5728\u89e3\u91cb\u6e2c\u8a66\u7d50\u679c\u6642\u4ecd\u6709\u4e00\u4e9b\u91cd\u8981\u7684\u6ce8\u610f\u4e8b\u9805\u3002\u9996\u5148\u6700\u91cd\u8981\u7684\uff0c\u7121\u8ad6\u5206\u6578\u503c\u5982\u4f55\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u4e00\u5b9a\u8981\u8207\u8a13\u7df4\u96c6\u76f8\u6bd4\u8f03\u624d\u80fd\u78ba\u4fdd\u6a21\u578b\u8a13\u7df4\u597d\u8207\u58de\u3002\u7576\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u96c6\u5206\u6578\u9ad8\u65bc\u6e2c\u8a66\u96c6\u7684\u5206\u6578\uff0c\u4e26\u4e14\u5169\u8005\u90fd\u8db3\u5920\u9ad8\u4ee5\u6eff\u8db3\u5c08\u6848\u7684\u76ee\u6a19\u671f\u671b\u6642\u9019\u4ee3\u8868\u4f60\u8a13\u7df4\u4e86\u4e00\u500b\u597d\u6a21\u578b\u3002\u7136\u800c\u9019\u4e26\u4e0d\u610f\u5473\u8457\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u7570\u8d8a\u5927\u8d8a\u597d\u3002\u8209\u500b\u4f8b\u5b50\uff0c\u82e5\u8a13\u7df4\u96c6\u7684 R2 score \u70ba 0.85 \u6e2c\u8a66\u96c6\u70ba 0.8 \u5373\u4ee3\u8868\u6a21\u578b\u65e2\u4e0d\u904e\u5ea6\u64ec\u5408(overfit)\u4e5f\u4e0d\u6b20\u64ec\u5408(underfit)\u3002\u4f46\u662f\u5982\u679c\u8a13\u7df4\u96c6 0.9 \u6e2c\u8a66\u96c6 0.8 \u7684\u6642\u5019\uff0c\u4f60\u7684\u6a21\u578b\u5c31\u662f\u904e\u64ec\u5408\u3002\u5176\u539f\u56e0\u662f\u8a72\u6a21\u578b\u6c92\u6709\u5728\u8a13\u7df4\u671f\u9593\u9032\u884c\u6cdb\u5316\uff0c\u800c\u662f\u8a18\u4f4f\u4e86\u4e00\u4e9b\u8a13\u7df4\u6578\u64da\uff0c\u5f9e\u800c\u5c0e\u81f4\u6e2c\u8a66\u5206\u6578\u4f4e\u5f97\u591a\u3002 \u5728\u5927\u591a\u6578\u4efb\u52d9\u4e2d\u4f60\u5c07\u6703\u770b\u5230\u8a31\u591a\u4eba\u4f7f\u7528 tree-based \u6a21\u578b\u6216\u662f\u6574\u9ad4\u5b78\u7fd2\u6a21\u578b (ensemble models)\u3002\u4f8b\u5982\u5728\u96a8\u6a5f\u68ee\u6797\u6f14\u7b97\u6cd5\u7576\u4e2d\u5982\u679c\u5b83\u5011\u7684\u6a39\u6df1\u5ea6\u592a\u6df1\uff0c\u5f80\u5f80\u6703\u7372\u5f97\u975e\u5e38\u9ad8\u7684\u8a13\u7df4\u5206\u6578\uff0c\u5f9e\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u53e6\u5916\u4e5f\u6709\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u6bd4\u8a13\u7df4\u96c6\u9ad8\u7684\u60c5\u6cc1\uff0c\u82e5\u767c\u751f\u6b64\u60c5\u6cc1\u6642\u901a\u5e38\u90fd\u6703\u611f\u89ba\u662f\u4e0d\u662f\u505a\u932f\u4e86\u4ec0\u9ebc\u3002\u9019\u7a2e\u60c5\u6cc1\u7684\u4e3b\u8981\u539f\u56e0\u662f\u8cc7\u6599\u6d29\u6f0f\uff0c\u4e5f\u5c31\u662f\u4e0a\u4e00\u7bc0\u6211\u5011\u8a0e\u8ad6\u7684\u60c5\u6cc1\u3002\u6216\u662f\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599\u7b46\u6578\u592a\u5c11\uff0c\u6c92\u8fa6\u6cd5\u8db3\u4ee5\u9a57\u8b49\u6a21\u578b\u597d\u58de\u3002 \u53e6\u5916\u6709\u6642\u5019\u6211\u5011\u4e5f\u6703\u5f97\u5230\u5728\u8a13\u7df4\u96c6\u6709\u5f88\u597d\u7684\u8868\u73fe\u4f46\u6e2c\u8a66\u96c6\u7121\u6575\u5dee\u7684\u60c5\u6cc1\u3002\u7576\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u5dee\u7570\u5f88\u5927\u6642\uff0c\u554f\u984c\u5f80\u5f80\u8207\u6e2c\u8a66\u96c6\u6709\u95dc\u800c\u4e0d\u662f\u904e\u5ea6\u64ec\u5408\u3002\u9019\u6642\u5019\u4f60\u53ef\u80fd\u8981\u6aa2\u67e5\u8cc7\u6599\u9810\u8655\u7406\u7684\u65b9\u5f0f\u662f\u5426\u4e00\u81f4 (\u50cf\u662f\u53d6 log \u6216 scale)\uff0c\u6216\u662f\u53ea\u662f\u5fd8\u8a18\u5c0d\u6e2c\u8a66\u96c6\u505a\u8f49\u63db\u8655\u7406\u3002 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\uff0c\u7e3d\u4e4b\u5728\u8a13\u7df4\u597d\u6a21\u578b\u6642\u8acb\u4ed4\u7d30\u6aa2\u67e5\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u8ddd\u3002\u4e26\u4e14\u53ef\u4ee5\u900f\u904e\u6b64\u8a55\u4f30\u65b9\u5f0f\u6aa2\u8996\u6a21\u578b\u662f\u5426\u904e\u64ec\u5408\uff0c\u540c\u6642\u4e5f\u80fd\u9032\u884c\u6a21\u578b\u689d\u53c3\u6216\u662f\u9078\u64c7\u6700\u4f73\u7684\u8cc7\u6599\u9810\u8655\u7406\u65b9\u5f0f\u3002\u4e26\u70ba\u6700\u7d42\u7684\u6a21\u578b\u505a\u6700\u4f73\u7684\u6e96\u5099\u3002","title":"6. \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#7","text":"\u6211\u60f3\u5927\u5bb6\u61c9\u8a72\u90fd\u719f\u7df4\u638c\u63e1\u4e86 overfitting \u9019\u500b\u8b70\u984c\u3002\u9019\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u4e00\u500b\u8feb\u5207\u554f\u984c\uff0c\u4e26\u5df2\u7d93\u8a2d\u8a08\u4e86\u7121\u6578\u500b\u65b9\u6cd5\u4f86\u89e3\u6c7a\u5b83\u3002\u6700\u57fa\u672c\u7684\u65b9\u6cd5\u662f\u5c07\u4e00\u90e8\u5206\u6578\u64da\u4f5c\u70ba\u6e2c\u8a66\u96c6\u4f86\u6a21\u64ec\u548c\u6e2c\u91cf\u6a21\u578b\u5728\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u7684\u6027\u80fd\u3002\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u8abf\u6574\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u76f4\u5230\u6a21\u578b\u5728\u8a72\u7279\u5b9a\u6e2c\u8a66\u96c6\u4e0a\u9054\u5230\u6700\u9ad8\u5206\u6578\uff0c\u9019\u53c8\u610f\u5473\u8457\u67d0\u7a2e\u542b\u7fa9\u7684\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u6703\u5c07\u5b8c\u6574\u6578\u64da\u7684\u53e6\u4e00\u90e8\u5206\u4f5c\u70ba \u9a57\u8b49\u96c6 \u518d\u6b21\u89e3\u6c7a\u9019\u500b\u554f\u984c\u3002\u6a21\u578b\u5c07\u5728\u8a13\u7df4\u6578\u64da\u4e0a\u9032\u884c\u8a13\u7df4\uff0c\u4e26\u5728\u9a57\u8b49\u96c6\u4e0a\u5fae\u8abf\u5176\u53c3\u6578\uff0c\u4e26\u5728\u6e2c\u8a66\u96c6\u4e0a\u9032\u884c\u6700\u7d42\u8a55\u4f30\u3002 \u4f46\u662f\u5c07\u6211\u5011\u5bf6\u8cb4\u7684\u6578\u64da\u5206\u6210\u4e09\u7d44\u610f\u5473\u8457\u6a21\u578b\u53ef\u4ee5\u5b78\u7fd2\u7684\u6578\u64da\u91cf\u66f4\u5c11\u3002\u6b64\u5916\u6a21\u578b\u7684\u6574\u9ad4\u9810\u6e2c\u6027\u80fd\u5c07\u53d6\u6c7a\u65bc\u90a3\u5c0d\u7279\u5b9a\u7684\u8a13\u7df4\u96c6\u548c\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u6642\u6700\u5e38\u4f7f\u7528 K-Fold cross-validation \u89e3\u6c7a\u4e0a\u8ff0\u554f\u984c\u3002\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6211\u7684\u524d\u5169\u5929\u6587\u7ae0 [Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4ee5\u53ca [Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u3002\u6839\u64da\u6211\u5011\u8a2d\u5b9a\u7684 K \u503c\uff0c\u53ef\u4ee5\u5b8c\u6574\u7684\u5c07\u6578\u64da\u88ab\u5206\u6210 K \u7d44 folds\uff0c\u5c0d\u65bc\u6bcf\u500b folds \u6bcf\u6b21\u6a21\u578b\u8a13\u7df4\u6703\u628a K-1 \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\uff0c\u800c\u5269\u4e0b\u7684\u88ab\u6b78\u985e\u70ba\u9a57\u8b49\u96c6\u3002\u7576\u6a21\u578b\u4ea4\u53c9\u9a57\u8b49\u7d50\u675f\u5f8c\uff0c\u8a13\u7df4\u96c6\u6240\u6709\u8cc7\u6599\u6703\u88ab\u5b8c\u6574\u7684\u8a13\u7df4\u3002","title":"7. \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#8","text":"\u5728\u9810\u8a2d\u7684\u60c5\u6cc1\u4e0b\u6240\u6709 Sklearn \u5206\u985e\u5668\u5728\u547c\u53eb score() \u51fd\u6578\u6642\u90fd\u4f7f\u7528\u6e96\u78ba\u5ea6\u4f5c\u70ba\u8a55\u5206\u65b9\u6cd5\u3002\u7531\u65bc\u6e96\u78ba\u7387\u7684\u8a08\u7b97\u65b9\u5f0f\u7c21\u55ae\u8207\u5bb9\u6613\u7406\u89e3\uff0c\u56e0\u6b64\u7d93\u5e38\u6703\u770b\u5230\u521d\u5b78\u8005\u5ee3\u6cdb\u4f7f\u7528\u5b83\u4f86\u5224\u65b7\u5176\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e0d\u5e78\u7684\u662f\u9019\u7a2e\u4e00\u822c\u6e96\u78ba\u7387\u7684\u8a55\u4f30\u65b9\u5f0f\u53ea\u5c0d\u985e\u5225\u5e73\u8861\u7684\u4e8c\u5143\u5206\u985e\u554f\u984c\u6709\u7528\u3002 \u7136\u800c\u5728\u5176\u4ed6\u7684\u72c0\u6cc1\u4e0b\u5b83\u662f\u4e00\u500b\u8aa4\u5c0e\u6027\u7684\u6307\u6a19\uff0c\u5373\u4f7f\u662f\u8868\u73fe\u6700\u5dee\u7684\u6a21\u578b\u4e5f\u53ef\u80fd\u80cc\u5f8c\u96b1\u85cf\u8457\u9ad8\u6e96\u78ba\u5ea6\u7684\u5206\u6578\u3002\u8209\u4f8b\u4f86\u8aaa\u6709\u500b\u5075\u6e2c\u5783\u573e\u90f5\u4ef6\u7684\u6a21\u578b\u5b83\u7684\u6e96\u78ba\u7387 90%\uff0c\u4f46\u662f\u5be6\u969b\u4e0a\u5b83\u6839\u672c\u7121\u6cd5\u5075\u6e2c\u5230\u5783\u573e\u90f5\u4ef6\u3002\u9019\u662f\u70ba\u4ec0\u9ebc\uff1f\u7531\u65bc\u5783\u573e\u90f5\u4ef6\u4e26\u4e0d\u5e38\u898b\uff0c\u5206\u985e\u5668\u53ef\u4ee5\u6aa2\u6e2c\u6240\u6709\u975e\u5783\u573e\u90f5\u4ef6\uff0c\u5373\u4f7f\u5206\u985e\u5668\u5b8c\u5168\u7121\u6cd5\u9054\u5230\u5176\u76ee\u7684\u9019\u4e5f\u53ef\u4ee5\u63d0\u9ad8\u5176\u6e96\u78ba\u6027\u3002\u56e0\u70ba\u9019\u500b\u5206\u985e\u5668\u50c5\u53ef\u4ee5\u5206\u985e\u9019\u4e9b\u6b63\u5e38\u90f5\u4ef6\uff0c\u7a00\u5c11\u7684\u5783\u573e\u90f5\u4ef6\u6839\u672c\u8b8a\u8a8d\u4e0d\u51fa\u4f86\u3002 \u5c0d\u65bc\u591a\u5143\u985e\u5206\u985e\u7684\u554f\u984c\u66f4\u662f\u61c9\u8a72\u6ce8\u610f\u4f60\u7684\u6a21\u578b\u8a55\u4f30\u6307\u6a19\u3002\u5982\u679c\u9054\u5230 80% \u7684\u6e96\u78ba\u7387\uff0c\u662f\u5426\u610f\u5473\u8457\u6a21\u578b\u5728\u9810\u6e2c\u985e\u52251\u3001\u985e\u52252\u3001\u985e\u52253\u751a\u81f3\u6240\u6709\u985e\u6642\u4e00\u6a23\u6e96\u78ba\u5462\uff1f\u4e00\u822c\u7684\u6e96\u78ba\u7387\u6c38\u9060\u7121\u6cd5\u56de\u7b54\u6b64\u985e\u554f\u984c\uff0c\u4f46\u5e78\u904b\u7684\u662f\u5176\u4ed6\u5206\u985e\u6307\u6a19\u63d0\u4f9b\u4e86\u66f4\u591a\u7684\u8a0a\u606f\u6307\u6a19\u3002\u5b83\u5c31\u662f \u6df7\u6dc6\u77e9\u9663 (confusion matrix)\u3002 from sklearn.metrics import confusion_matrix y_true = [ 2 , 0 , 2 , 2 , 0 , 1 ] y_pred = [ 0 , 0 , 2 , 2 , 0 , 2 ] confusion_matrix ( y_true , y_pred ) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) \u7d44\u6210\u6df7\u6dc6\u77e9\u9663\u7684\u56db\u500b\u5143\u7d20\u5206\u5225\u6709 TP\u3001TN\u3001FP\u3001FN\u3002\u57fa\u672c\u4e0a\u6df7\u6dc6\u77e9\u9663\u6703\u62ff\u9019\u56db\u500b\u6307\u6a19\u505a\u53c3\u8003\uff0c\u540c\u6642\u7b97\u51fa\u4f86\u7684\u5206\u6578\u4e5f\u66f4\u80fd\u53bb\u8a55\u4f30\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u7684\u7d50\u679c\u3002\u6b64\u5916\u6211\u5011\u53ef\u4ee5\u5229\u7528\u6df7\u6dc6\u77e9\u9663\u4f86\u8a08\u7b97 Precision\u3001Recall\u3001Accuracy \u7b49\u5206\u6578\u3002 TP(True Positive): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\u771f\u5be6\u7b54\u6848(Ground True)\u662f\u8c93\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93\uff0c\u5373\u70baTP TN(True Negative): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u8ca0\u6a23\u672c\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u6a19\u793a\u6210\u4e0d\u662f\u8c93\uff0c\u5373\u70baTN FP(False Positive): \u932f\u8aa4\u9810\u6e2c\u6210\u6b63\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u8ca0\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93 FN(False Negative): \u932f\u8aa4\u9810\u6e2c\u6210\u8ca0\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u4e0d\u662f\u8c93","title":"8. \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#9-r2","text":"\u5728\u9810\u6e2c\u9023\u7e8c\u6027\u6578\u503c\u8f38\u51fa\u7684\u8ff4\u6b78\u6a21\u578b\u4e2d\uff0c\u5927\u5bb6\u5f80\u5f80\u6703\u76f4\u63a5\u547c\u53eb\u6a21\u578b\u63d0\u4f9b\u7684\u8a55\u4f30\u65b9\u6cd5\u76f4\u63a5\u8a08\u7b97 score \u3002\u7136\u800c\u9019\u500b\u5206\u6578\u5728\u8ff4\u6b78\u6a21\u578b\u4e2d\u662f\u8a08\u7b97 R2 \u5206\u6578\uff0c\u53c8\u7a31\u5224\u5b9a\u4fc2\u6578 (coefficient of determination)\u3002\u6240\u8b02\u7684\u5224\u5b9a\u4fc2\u6578\u662f\u8f38\u5165\u7279\u5fb5 (x) \u53bb\u89e3\u91cb\u8f38\u51fa (y) \u7684\u8b8a\u7570\u7a0b\u5ea6\u6709\u591a\u5c11\uff0c\u5176\u8a08\u7b97\u516c\u5f0f\u662f\uff1a\u8ff4\u6b78\u6a21\u578b\u7684\u8b8a\u7570\u91cf (SSR)/\u7e3d\u8b8a\u7570\u91cf (TSS) \u3002\u7528\u4ee5\u4e0b\u8b8a\u7570\u6578\u5206\u6790\u8868\uff08ANOVA table\uff09\u4f86\u8aaa TSS \u5c31\u662f\u8a08\u7b97\u7e3d\u8b8a\u7570\uff0c\u628a\u6bcf\u500b\u5be6\u969b\u7684 y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u800c SSR \u5c31\u662f\u628a\u6240\u6709\u7684\u6a21\u578b\u9810\u6e2c y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u5982\u679c R2 \u5206\u6578\u5f88\u9ad8\u8d8a\u63a5\u8fd1 1\uff0c\u8868\u793a\u6a21\u578b\u7684\u89e3\u91cb\u80fd\u529b\u5f88\u9ad8\u3002 \u5728\u5b78\u8853\u7814\u7a76\u4e0a\u6700\u76f4\u89ba\u7684\u89c0\u5ff5\u662f R2 \u5206\u6578\u6108\u63a5\u8fd1 1 \u8d8a\u597d\uff0c\u4e5f\u6709\u4e9b\u4eba\u900f\u904e\u4e00\u4e9b\u624b\u6bb5\u4f86\u88fd\u9020 R2 \u5206\u6578\u5f88\u9ad8\u7684\u5047\u8c61\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5176\u5be6\u53ea\u900f\u904e R2 \u500b\u8a55\u4f30\u6307\u6a19\u5c31\u4f86\u6c7a\u5b9a\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u662f\u4e0d\u592a\u597d\u7684\u7fd2\u6163\u3002\u66f4\u9032\u4e00\u6b65\u53ef\u4ee5\u4f7f\u7528 MSE\u3001MAE \u7b49\u6b98\u5dee\u7684\u8a55\u4f30\u503c\u6a19\u4f86\u770b\u6bcf\u7b46\u8cc7\u6599\u5be6\u969b\u503c\u8207\u9810\u6e2c\u503c\u7684\u8aa4\u5dee\u3002\u6216\u662f\u4f7f\u7528\u76f8\u5c0d\u8aa4\u5dee\u4f86\u89c0\u5bdf\u9810\u6e2c\u6a21\u578b\u7684\u53ef\u4fe1\u5ea6\u3002\u6b64\u5916\u7b46\u8005\u9084\u5efa\u8b70\u53ef\u4ee5\u8a66\u8457\u628a\u6bcf\u7b46\u8cc7\u6599\u7684\u771f\u5be6 y \u8207\u6a21\u578b\u9810\u6e2c\u7684 \u0177 \u7e6a\u88fd\u51fa\u4f86\uff0c\u82e5\u5448\u73fe\u4e00\u689d\u660e\u986f\u7684\u7531\u5de6\u4e0b\u5230\u53f3\u4e0a\u659c\u76f4\u7dda\uff0c\u5247\u8868\u793a\u6a21\u578b\u6240\u9810\u6e2c\u7684\u7d50\u679c\u8207\u771f\u5be6\u7b54\u6848\u5f88\u76f8\u8fd1\u3002","title":"9. \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#10-ai","text":"\u8fd1\u5e7e\u5e74 AI \u7684\u767c\u5c55\u60f3\u5fc5\u5927\u5bb6\u6709\u76ee\u5171\u7779\uff0c\u5f9e\u5f71\u50cf\u8b58\u5225\u5230\u7269\u4ef6\u8fa8\u8b58\u7684\u6280\u8853\u6709\u8457\u91cd\u5927\u7684\u9032\u5c55\u3002\u6b64\u5916 2016 \u5e74 Google Deepmind \u5718\u968a\u7684 AlphaGo \u9996\u5ea6\u6253\u6557\u4eba\u985e\uff0c\u9019\u4e5f\u5728\u4eba\u6a5f\u5c0d\u5f08\u4e0a\u958b\u555f\u4e86\u4e00\u9805\u91cd\u8981\u7684\u91cc\u7a0b\u7891\u3002\u751a\u81f3\u5728\u81ea\u7136\u8a9e\u8a00\u65b9\u9762\uff0c\u6b78\u529f\u65bc\u65b0\u7684\u6a21\u578b\u67b6\u69cb\u8207\u786c\u9ad4\u8cc7\u6e90\u7684\u9032\u6b65\uff0c\u4f7f\u5f97\u81ea\u7136\u8a9e\u8a00\u6709\u91cd\u5927\u7684\u7a81\u7834\u3002\u770b\u5230\u9019\u9ebc\u591a AI \u7684\u7f8e\u597d\u8b93\u5927\u5bb6\u518d\u6b21\u5c0d\u6df1\u5ea6\u5b78\u7fd2\u9ede\u71c3\u5e0c\u671b\uff01\u53ea\u4e0d\u904e AI \u4e26\u975e\u842c\u80fd\uff0c\u5207\u8a18\uff01\u6240\u6709\u7684\u554f\u984c\u4e26\u4e0d\u662f\u5c07\u8cc7\u6599\u6536\u96c6\u597d\uff0c\u4e26\u5c07\u8cc7\u6599\u4e1f\u7d66\u96fb\u8166\u5b78\u7fd2\u5c31\u6703\u5f97\u5230\u4f60\u60f3\u8981\u7684\u7d50\u679c\u3002\u5927\u5bb6\u4e5f\u8a31\u6703\u9677\u5165\u300c\u70ba AI \u800c AI\u300d \u7684\u8ff7\u601d\uff0c\u5f88\u591a\u7684\u4efb\u52d9\u5176\u5be6\u900f\u904e\u5177\u6709\u898f\u5247\u7684\u5c08\u5bb6\u7cfb\u7d71\u6216\u662f\u50b3\u7d71\u6f14\u7b97\u6cd5\u5c31\u53ef\u4ee5\u9054\u5230\u5f88\u4e0d\u932f\u7684\u7d50\u679c\u3002\u518d\u8005\u6211\u5011\u90fd\u5c0d AI \u7684\u6280\u8853\u611f\u5230\u7279\u5225\u6b61\u559c\u8207\u671f\u5f85\uff0c\u4f46\u662f AI \u7684\u9ed1\u76d2\u5b50\u4eba\u985e\u5f80\u5f80\u4e0d\u77e5\u9053\u6a21\u578b\u4e0b\u4e00\u6b65\u6703\u7522\u751f\u4ec0\u9ebc\u4e0d\u53ef\u9810\u671f\u7684\u7d50\u679c\u3002\u5176\u5be6 AI \u6709\u5f88\u591a\u7684\u9650\u5236\u8207\u6311\u6230\uff0c\u9664\u4e86\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4ee5\u5916\uff0c\u6211\u5011\u66f4\u9700\u8981\u95dc\u6ce8\u7684\u662f\u6a21\u578b\u5728\u60f3\u4ec0\u9ebc\u3002\u53ef\u89e3\u91cb\u4eba\u5de5\u667a\u6167\u5fc5\u7136\u662f\u6211\u5011\u8981\u63a2\u8a0e\u7684\u4e00\u6bb5\u8ab2\u984c\u3002AI \u8207\u6a5f\u5668\u4eba\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u8981\u53d6\u4ee3\u4eba\u985e\uff0c\u6211\u8a8d\u70ba AI \u6bd4\u8f03\u9069\u5408\u626e\u6f14\u8f14\u52a9\u4eba\u985e\u7684\u91cd\u8981\u89d2\u8272\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"10. \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/","text":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4f7f\u7528 pickle + gzip \u5132\u5b58\u6a21\u578b \u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6253\u5305\u4e26\u5132\u5b58 \u8f09\u5165\u5132\u5b58\u7684\u6a21\u578b \u8b80\u53d6\u6253\u5305\u597d\u7684\u6a21\u578b\u4e26\u9810\u6e2c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u4eca\u5929\u7684\u6559\u5b78\u5167\u5bb9\u8981\u6559\u5404\u4f4d\u5982\u4f55\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\uff0c\u4e26\u63d0\u4f9b\u4e0b\u4e00\u6b21\u8f09\u5165\u6a21\u578b\u548c\u9810\u6e2c\u3002\u5728\u672c\u7cfb\u5217\u7684\u6559\u5b78\u4e2d\u4ecb\u7d39\u4e86\u8a31\u591a Sklearn \u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u3002\u7576\u6a21\u578b\u8a13\u7df4\u597d\u4e86\uff0c\u53ef\u4ee5\u5c07\u8a13\u7df4\u7d50\u679c\u5132\u5b58\u8d77\u4f86\uff0c\u4e26\u5efa\u7acb\u4e00\u500b API \u63a5\u53e3\u63d0\u4f9b\u6a21\u578b\u9810\u6e2c\u3002 \u6a21\u578b\u5132\u5b58\u65b9\u6cd5 \u5e38\u898b\u7684\u5132\u5b58\u6a21\u578b\u7684\u5957\u4ef6\u6709 pickle \u8207 joblib \u3002\u5176\u4e2d\u5728 [Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u6700\u5f8c\u6709\u4f7f\u7528 joblib \u4f86\u5132\u5b58\u6a21\u578b\uff0c\u64cd\u4f5c\u65b9\u6cd5\u4e5f\u975e\u5e38\u7c21\u55ae\u3002\u7136\u800c\u5728\u4eca\u5929\u7684\u6559\u5b78\u4e2d\u5247\u4f7f\u7528\u53e6\u4e00\u7a2e\u65b9\u6cd5 pickle \u4f86\u5132\u5b58\u6a21\u578b\u3002\u7531\u65bc pickle \u5132\u5b58\u6a21\u578b\u5f8c\u5bb9\u91cf\u53ef\u80fd\u6703\u6709\u597d\u5e7e\u767e MB \u56e0\u6b64\u5efa\u8b70\u53ef\u4ee5\u900f\u904e gzip \u4f86\u58d3\u7e2e\u6a21\u578b\u4e26\u5132\u5b58\u3002\u53e6\u5916\u5728 Python \u5b98\u65b9\u6587\u4ef6\u4e2d\u6709\u8b66\u544a\u7d55\u5c0d\u4e0d\u8981\u5229\u7528 pickle \u4f86 unpickle \u4f86\u8def\u4e0d\u660e\u7684\u6a94\u6848\u3002\u56e0\u70ba\u900f\u904e pickle \u6253\u5305\u6a21\u578b\u6703\u6709\u5b89\u5168\u6027\u7591\u616e\uff0c\u5305\u62ec arbitrary code execution \u7684\u554f\u984c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5982\u679c\u8981\u8ffd\u6c42\u57f7\u884c\u901f\u5ea6\u8207\u5b89\u5168\u6027\uff0c\u5efa\u8b70\u53ef\u4ee5\u63a1\u7528 JSON \u683c\u5f0f\u4f86\u5b58\u53d6\u6a21\u578b\u7684\u53c3\u6578\u8207\u8a2d\u5b9a\u3002 \u5f8c\u8a18\uff1a\u9019\u5e7e\u5e74ONNX\u6a21\u578b\u901a\u7528\u683c\u5f0f\u4e5f\u975e\u5e38\u6d41\u884c\uff0c\u9664\u4e86\u795e\u7d93\u7db2\u8def\u4e4b\u5916\u4e5f\u652f\u63f4sklearn\u7684\u6a21\u578b\u5132\u5b58\u3002\u5927\u5bb6\u4e0d\u59a8\u4e5f\u53ef\u4ee5\u8a66\u8a66\u770b\uff01 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u4eca\u65e5\u7684\u7bc4\u4f8b\u9084\u662f\u62ff\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u5148\u8f09\u5165\u8cc7\u6599\u96c6\u4e26\u9032\u884c\u8cc7\u6599\u7684\u5207\u5272\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data 2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8a13\u7df4\u6a21\u578b - XGBoost XGBoost \u6a21\u578b\u662f\u76ee\u524d\u6700\u71b1\u9580\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u4e4b\u4e00\uff0c\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003 [Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u3002\u88e1\u9762\u6703\u6709\u4ecb\u7d39\u8a73\u7d30\u7684\u6a21\u578b\u8aaa\u660e\u8207\u624b\u628a\u624b\u5be6\u4f5c\u3002\u7576\u7136\u5927\u5bb6\u4e5f\u53ef\u4ee5\u8a66\u8457\u7528\u5176\u4ed6 Sklearn \u7684\u6a21\u578b\u8a13\u7df4\u770b\u770b\uff0c\u4e00\u6a23\u53ef\u4ee5\u900f\u904e pickle \u4f86\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train ) \u5132\u5b58 XGboost \u6a21\u578b \u5927\u5bb6\u53ef\u4ee5\u89c0\u5bdf .pickle \u8207 .gzip \u5169\u7a2e\u4e0d\u540c\u526f\u6a94\u540d\u5132\u5b58\u7d50\u679c\u6a94\u6848\u5927\u5c0f\u6709\u4f55\u5dee\u5225? 1. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b import pickle with open ( './model/xgboost-iris.pickle' , 'wb' ) as f : pickle . dump ( xgboostModel , f ) 2. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b\u4e26\u5229\u7528 gzip \u58d3\u7e2e import pickle import gzip with gzip . GzipFile ( './model/xgboost-iris.pgz' , 'w' ) as f : pickle . dump ( xgboostModel , f ) \u8f09\u5165 XGboost \u6a21\u578b \u8a66\u8457\u8f09\u5165\u5169\u7a2e\u4e0d\u540c\u683c\u5f0f\u7684\u6a21\u578b\uff0c\u4e26\u9810\u6e2c\u4e00\u7b46\u8cc7\u6599\u3002\u6ce8\u610f\u6a21\u578b\u9810\u6e2c\u8f38\u5165\u5fc5\u9808\u70ba numpy \u578b\u614b\uff0c\u4e14\u9808\u70ba\u4e8c\u7dad\u9663\u5217\u683c\u5f0f\u3002 1. \u8f09\u5165 gzip \u683c\u5f0f\u6a21\u578b import pickle import gzip #\u8b80\u53d6Model with gzip . open ( './model/xgboost-iris.pgz' , 'r' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred ) 2. \u8f09\u5165 pickle \u683c\u5f0f\u6a21\u578b #\u8b80\u53d6Model with open ( './model/xgboost-iris.pickle' , 'rb' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred ) Reference How to save and load your Scikit-learn models in a minute Don't Pickle Your Data \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#day-28","text":"","title":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_1","text":"\u4f7f\u7528 pickle + gzip \u5132\u5b58\u6a21\u578b \u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6253\u5305\u4e26\u5132\u5b58 \u8f09\u5165\u5132\u5b58\u7684\u6a21\u578b \u8b80\u53d6\u6253\u5305\u597d\u7684\u6a21\u578b\u4e26\u9810\u6e2c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_2","text":"\u4eca\u5929\u7684\u6559\u5b78\u5167\u5bb9\u8981\u6559\u5404\u4f4d\u5982\u4f55\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\uff0c\u4e26\u63d0\u4f9b\u4e0b\u4e00\u6b21\u8f09\u5165\u6a21\u578b\u548c\u9810\u6e2c\u3002\u5728\u672c\u7cfb\u5217\u7684\u6559\u5b78\u4e2d\u4ecb\u7d39\u4e86\u8a31\u591a Sklearn \u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u3002\u7576\u6a21\u578b\u8a13\u7df4\u597d\u4e86\uff0c\u53ef\u4ee5\u5c07\u8a13\u7df4\u7d50\u679c\u5132\u5b58\u8d77\u4f86\uff0c\u4e26\u5efa\u7acb\u4e00\u500b API \u63a5\u53e3\u63d0\u4f9b\u6a21\u578b\u9810\u6e2c\u3002","title":"\u524d\u8a00"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_3","text":"\u5e38\u898b\u7684\u5132\u5b58\u6a21\u578b\u7684\u5957\u4ef6\u6709 pickle \u8207 joblib \u3002\u5176\u4e2d\u5728 [Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u6700\u5f8c\u6709\u4f7f\u7528 joblib \u4f86\u5132\u5b58\u6a21\u578b\uff0c\u64cd\u4f5c\u65b9\u6cd5\u4e5f\u975e\u5e38\u7c21\u55ae\u3002\u7136\u800c\u5728\u4eca\u5929\u7684\u6559\u5b78\u4e2d\u5247\u4f7f\u7528\u53e6\u4e00\u7a2e\u65b9\u6cd5 pickle \u4f86\u5132\u5b58\u6a21\u578b\u3002\u7531\u65bc pickle \u5132\u5b58\u6a21\u578b\u5f8c\u5bb9\u91cf\u53ef\u80fd\u6703\u6709\u597d\u5e7e\u767e MB \u56e0\u6b64\u5efa\u8b70\u53ef\u4ee5\u900f\u904e gzip \u4f86\u58d3\u7e2e\u6a21\u578b\u4e26\u5132\u5b58\u3002\u53e6\u5916\u5728 Python \u5b98\u65b9\u6587\u4ef6\u4e2d\u6709\u8b66\u544a\u7d55\u5c0d\u4e0d\u8981\u5229\u7528 pickle \u4f86 unpickle \u4f86\u8def\u4e0d\u660e\u7684\u6a94\u6848\u3002\u56e0\u70ba\u900f\u904e pickle \u6253\u5305\u6a21\u578b\u6703\u6709\u5b89\u5168\u6027\u7591\u616e\uff0c\u5305\u62ec arbitrary code execution \u7684\u554f\u984c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5982\u679c\u8981\u8ffd\u6c42\u57f7\u884c\u901f\u5ea6\u8207\u5b89\u5168\u6027\uff0c\u5efa\u8b70\u53ef\u4ee5\u63a1\u7528 JSON \u683c\u5f0f\u4f86\u5b58\u53d6\u6a21\u578b\u7684\u53c3\u6578\u8207\u8a2d\u5b9a\u3002 \u5f8c\u8a18\uff1a\u9019\u5e7e\u5e74ONNX\u6a21\u578b\u901a\u7528\u683c\u5f0f\u4e5f\u975e\u5e38\u6d41\u884c\uff0c\u9664\u4e86\u795e\u7d93\u7db2\u8def\u4e4b\u5916\u4e5f\u652f\u63f4sklearn\u7684\u6a21\u578b\u5132\u5b58\u3002\u5927\u5bb6\u4e0d\u59a8\u4e5f\u53ef\u4ee5\u8a66\u8a66\u770b\uff01","title":"\u6a21\u578b\u5132\u5b58\u65b9\u6cd5"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1","text":"\u4eca\u65e5\u7684\u7bc4\u4f8b\u9084\u662f\u62ff\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u5148\u8f09\u5165\u8cc7\u6599\u96c6\u4e26\u9032\u884c\u8cc7\u6599\u7684\u5207\u5272\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2","text":"from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape )","title":"2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#-xgboost","text":"XGBoost \u6a21\u578b\u662f\u76ee\u524d\u6700\u71b1\u9580\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u4e4b\u4e00\uff0c\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003 [Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u3002\u88e1\u9762\u6703\u6709\u4ecb\u7d39\u8a73\u7d30\u7684\u6a21\u578b\u8aaa\u660e\u8207\u624b\u628a\u624b\u5be6\u4f5c\u3002\u7576\u7136\u5927\u5bb6\u4e5f\u53ef\u4ee5\u8a66\u8457\u7528\u5176\u4ed6 Sklearn \u7684\u6a21\u578b\u8a13\u7df4\u770b\u770b\uff0c\u4e00\u6a23\u53ef\u4ee5\u900f\u904e pickle \u4f86\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train )","title":"\u8a13\u7df4\u6a21\u578b - XGBoost"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#xgboost","text":"\u5927\u5bb6\u53ef\u4ee5\u89c0\u5bdf .pickle \u8207 .gzip \u5169\u7a2e\u4e0d\u540c\u526f\u6a94\u540d\u5132\u5b58\u7d50\u679c\u6a94\u6848\u5927\u5c0f\u6709\u4f55\u5dee\u5225?","title":"\u5132\u5b58 XGboost \u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1-pickle","text":"import pickle with open ( './model/xgboost-iris.pickle' , 'wb' ) as f : pickle . dump ( xgboostModel , f )","title":"1. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2-pickle-gzip","text":"import pickle import gzip with gzip . GzipFile ( './model/xgboost-iris.pgz' , 'w' ) as f : pickle . dump ( xgboostModel , f )","title":"2. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b\u4e26\u5229\u7528 gzip \u58d3\u7e2e"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#xgboost_1","text":"\u8a66\u8457\u8f09\u5165\u5169\u7a2e\u4e0d\u540c\u683c\u5f0f\u7684\u6a21\u578b\uff0c\u4e26\u9810\u6e2c\u4e00\u7b46\u8cc7\u6599\u3002\u6ce8\u610f\u6a21\u578b\u9810\u6e2c\u8f38\u5165\u5fc5\u9808\u70ba numpy \u578b\u614b\uff0c\u4e14\u9808\u70ba\u4e8c\u7dad\u9663\u5217\u683c\u5f0f\u3002","title":"\u8f09\u5165 XGboost \u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1-gzip","text":"import pickle import gzip #\u8b80\u53d6Model with gzip . open ( './model/xgboost-iris.pgz' , 'r' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred )","title":"1. \u8f09\u5165 gzip \u683c\u5f0f\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2-pickle","text":"#\u8b80\u53d6Model with open ( './model/xgboost-iris.pickle' , 'rb' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred )","title":"2. \u8f09\u5165 pickle \u683c\u5f0f\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#reference","text":"How to save and load your Scikit-learn models in a minute Don't Pickle Your Data \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/","text":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 API \u89c0\u5ff5\u8b1b\u89e3 \u4ec0\u9ebc\u662f API\uff1f RESTful API HTTP Request \u65b9\u6cd5 \u624b\u628a\u624b\u5be6\u4f5c\u4e00\u500b\u82b1\u6735\u5206\u985e\u5668 API \u900f\u904e Python Flask \u5efa\u7f6e\u4e00\u500b\u5f8c\u7aef\u9810\u6e2c\u6a21\u578b API \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code \u524d\u8a00 \u7576\u6a21\u578b\u8a13\u7df4\u5b8c\u4ee5\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u5c31\u662f\u61c9\u7528\u8207\u843d\u5730\u3002\u6211\u5011\u53ef\u4ee5\u8a2d\u8a08\u4e00\u500b\u5d4c\u5165\u5f0f\u7cfb\u7d71\u8207\u4f7f\u7528\u8005\u4e92\u52d5\uff0c\u4f8b\u5982\u6a39\u8393\u6d3e\u3001Jetson Nano\u3001NeuroPilot...\u7b49\u786c\u9ad4\u4f86\u5354\u52a9 AI \u6a21\u578b\u7684\u908a\u7de3\u904b\u7b97\u3002\u6216\u662f\u8a2d\u8a08\u4e00\u500b\u624b\u6a5f APP \u4ee5\u53ca\u7db2\u9801\u61c9\u7528\u3002\u5f88\u591a\u4eba\u53ef\u80fd\u6703\u6709\u7591\u554f\u6a21\u578b\u8a13\u7df4\u597d\u7136\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u8a72\u600e\u505a\uff1f\u6700\u5e38\u898b\u7684\u505a\u6cd5\u5c31\u662f\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\u4e26\u5efa\u7acb\u4e00\u500b API \u90e8\u7f72\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u4e2d\uff0c\u63a5\u8457\u4efb\u4f55\u7684\u7d42\u7aef\u8a2d\u5099\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e00\u500b API \u9032\u884c\u8cc7\u6599\u7684\u5b58\u53d6\u8207\u6a21\u578b\u9810\u6e2c\u3002\u4e0b\u5716\u662f\u4e00\u500b\u7c21\u55ae\u7684\u6a21\u578b\u843d\u5730\u7684\u61c9\u7528\u60c5\u5883\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u90e8\u7f72\u6a21\u578b\u4e26\u5efa\u7acb\u4e00\u500b API \u7684\u63a5\u53e3\u8207\u524d\u7aef\u4f7f\u7528\u8005\u4e92\u52d5\u3002\u524d\u7aef\u7db2\u9801\u7684\u4f7f\u7528\u8005\u900f\u904e HTTP \u7684\u5354\u5b9a\u8207\u5f8c\u7aef\u4f3a\u670d\u5668\u9032\u884c\u901a\u8a0a\u8207\u8cc7\u6599\u4ea4\u63db\uff0c\u6700\u7d42\u6a21\u578b\u7684\u9810\u6e2c\u7d50\u679c\u6703\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u4e26\u5c07\u7d50\u679c\u9078\u67d3\u5728\u7db2\u9801\u4e0a\u3002\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u5167\u5bb9 [Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \uff0c\u76ee\u524d\u5df2\u7d93\u6210\u529f\u7684\u8f38\u51fa\u6a21\u578b\u3002\u4eca\u5929\u5c31\u4f86\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Python Flask \u67b6\u8a2d\u4e00\u500b\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u7684 API \u5427\uff01 \u4ec0\u9ebc\u662f API\uff1f \u6240\u8b02\u7684 API \u4e2d\u6587\u5168\u540d\u70ba\u61c9\u7528\u7a0b\u5f0f\u4ecb\u9762 (Application Programming Interface) \u662f\u5c6c\u65bc\u5ba2\u6236\u7aef\u8207\u4f3a\u670d\u7aef\u7684\u6e9d\u901a\u6a4b\u6a11\uff0c\u5b83\u63d0\u4f9b\u4e00\u500b\u7aef\u53e3\u80fd\u5920\u9032\u884c\u8cc7\u6599\u4ea4\u63db\u3002\u7c21\u55ae\u4f86\u8aaa\u662f\u4e00\u500b\u524d\u7aef\u8207\u5f8c\u7aef\u7684\u4e00\u500b\u6e9d\u901a\u4ecb\u9762\u3002 \u53e6\u5916\u5927\u5bb6\u53ef\u80fd\u807d\u904e\u4e00\u500b\u540d\u8a5e\u53eb\u505a RESTful API\u3002\u6240\u8b02\u7684 REST \u70ba Representational State Transfer \u7684\u7e2e\u5beb\u662f\u4e00\u7a2e\u7db2\u8def\u67b6\u69cb\u98a8\u683c\uff0c\u8fd1\u5e7e\u5e74\u4f86 REST \u7684\u6982\u5ff5\u5df2\u7d93\u88ab\u5be6\u4f5c\u5728\u5927\u578b\u7db2\u8def\u7cfb\u7d71\u4e2d\uff0c\u800c\u5728 Web \u670d\u52d9\u4e2d\u4f7f\u7528 REST \u6982\u5ff5\u88ab\u5be6\u4f5c\u51fa\u4f86\u7684 API \u5c31\u7c21\u7a31\u70ba RESTful API \u4ed6\u662f\u4f7f\u7528 HTTP \u7684\u5354\u5b9a\u5b8c\u6574\u5b9a\u7fa9 Web \u670d\u52d9\u5728 HTTP Request \u7684\u5404\u7a2e\u6d41\u7a0b\u3002 HTTP Request \u65b9\u6cd5 \u900f\u904e\u7db2\u8def\u5354\u5b9a HTTP Request \u4e0d\u540c\u7684\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5be6\u73fe\u4e0d\u540c\u7684\u8cc7\u6599\u4ea4\u63db\u8acb\u6c42\u65b9\u5f0f\u3002HTTP \u672c\u8eab\u5c31\u662f REST \u7684\u5be6\u4f5c\uff0c\u6240\u8b02\u7684 HTTP Request \u5b9a\u7fa9\u4e86\u516b\u7a2e\u8acb\u6c42\u65b9\u6cd5\u5206\u5225\u70ba\uff1a GET \uff1a\u6b64\u65b9\u6cd5\u53ea\u80fd\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u8981\u6c42\u53d6\u5f97\u8cc7\u6599\uff0c\u4e26\u4e0d\u6703\u66f4\u52d5\u5230\u5167\u90e8\u8cc7\u6e90\u3002 HEAD\uff1aHEAD \u8ddf GET \u65b9\u6cd5\u985e\u4f3c\u53ea\u5dee\u5225\u5728\u5b83\u4e26\u4e0d\u6703\u56de\u50b3\u4f60\u6240\u8acb\u6c42\u7684\u8cc7\u6e90\u5728 body \u4e0a\uff0c\u53ea\u56de\u50b3 HTTP header\u3002 POST \uff1a\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u63d0\u4ea4\u8cc7\u6599\u3002 PUT \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u63d0\u4ea4\u66f4\u65b0\u5167\u5bb9\u3002 DELETE \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u8acb\u6c42\u522a\u9664\u5167\u5bb9\u3002 CONNECT\uff1aHTTP/1.1\u5354\u8b70\u4e2d\u9810\u7559\u7d66\u80fd\u5920\u5c07\u9023\u63a5\u6539\u70ba\u7ba1\u9053\u65b9\u5f0f\u7684\u4ee3\u7406\u670d\u52d9\u5668\u3002 OPTIONS\uff1a\u6b64\u65b9\u6cd5\u53ef\u4f7f\u670d\u52d9\u5668\u50b3\u56de\u8a72\u8cc7\u6e90\u6240\u652f\u6301\u7684\u6240\u6709 HTTP \u8acb\u6c42\u65b9\u6cd5\u3002 TRACE\uff1a\u56de\u986f\u670d\u52d9\u5668\u6536\u5230\u7684\u8acb\u6c42\uff0c\u4e3b\u8981\u7528\u65bc\u6e2c\u8a66\u6216\u8a3a\u65b7\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 API \u5efa\u7acb Python Flask API Flask \u662f\u4e00\u500b\u4f7f\u7528 Python \u8a9e\u8a00\u7de8\u5beb\u7684\u8f15\u91cf\u7d1a Web \u61c9\u7528\u6846\u67b6\u3002\u5728\u4eca\u65e5\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u5ef6\u7e8c\u6628\u5929\u6240\u5132\u5b58\u7684\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u6a21\u578b\uff0c\u5efa\u7acb\u4e00\u500b\u82b1\u6735\u5206\u985e\u9810\u6e2c\u7684 API\u3002\u4f7f\u7528\u8005\u53ef\u4ee5\u900f\u904e POST \u5354\u5b9a\u5f9e\u524d\u7aef\u7db2\u9801\u767c\u9001\u56db\u500b\u6578\u503c\u5206\u5225\u70ba\u82b1\u843c\u7684\u9577\u8207\u5bec\u4ee5\u53ca\u82b1\u74e3\u7684\u9577\u8207\u5bec\u3002\u5f8c\u7aef\u7a0b\u5f0f\u6536\u5230\u6578\u503c\u5f8c\u9001\u7d66\u4e8b\u5148\u6253\u5305\u597d\u7684\u6a21\u578b\uff0c\u4e26\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u4ee5\u4e0b\u70ba\u7a0b\u5f0f\u6574\u500b\u6a39\u72c0\u7d50\u69cb\uff0c\u5176\u4e2d\u5728\u6700\u5916\u5c64\u8cc7\u6599\u593e\u6709\u4e09\u500b\u6a94\u6848\u5206\u5225\u6709\u5c07\u6a21\u578b\u5c01\u88dd\u6210\u51fd\u5f0f\u7684 model.py \u8207 Flask \u4e3b\u7a0b\u5f0f run.py \u4ee5\u53ca\u8ca0\u8cac\u7ba1\u7406\u5c08\u6848\u5957\u4ef6\u7684 requirements.txt \u3002\u53e6\u5916\u5728 model \u8cc7\u6599\u593e\u4e2d\u8ca0\u8cac\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u58d3\u7e2e\u6a94\u3002 . \u251c\u2500\u2500 model \u2502 \u2514\u2500\u2500 xgboost-iris.pgz \u251c\u2500\u2500 model.py \u251c\u2500\u2500 requirements.txt \u2514\u2500\u2500 run.py \u5c01\u88dd\u9810\u6e2c\u6a21\u578b (model.py) \u9996\u5148\u5efa\u7acb\u4e00\u500b model.py \u6a94\u6848\uff0c\u5728\u9019\u500b\u6a94\u6848\u4e2d\u6211\u5011\u8981\u8f09\u5165\u4e8b\u5148\u8a13\u7df4\u597d\u7684\u6a21\u578b\u4e26\u5c07\u5b83\u5c01\u88dd\u6210\u4e00\u500b function \u6216\u662f class\u3002\u5728\u672c\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u662f\u5efa\u7acb\u4e00\u500b predict() \u7684\u51fd\u5f0f\u4e26\u4e14\u5141\u8a31\u63a5\u6536\u4e00\u500b Numpy \u7684\u9663\u5217\uff0c\u5176\u4e2d\u88e1\u9762\u5141\u8a31\u593e\u5e36\u56db\u500b\u82b1\u6735\u7279\u5fb5\u7684\u6578\u503c\u3002\u6700\u5f8c\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u5b58\u653e\u5728 pred \u8b8a\u6578\u4e2d\uff0c\u4e26\u5c07\u9810\u6e2c\u7684\u985e\u5225\u56de\u50b3\u3002 # -*- coding: UTF-8 -*- import pickle import gzip # \u8f09\u5165\u6a21\u578b with gzip . open ( './model/xgboost-iris.pgz' , 'rb' ) as f : xgboostModel = pickle . load ( f ) # \u5c07\u6a21\u578b\u9810\u6e2c\u5beb\u6210\u4e00\u500b function def predict ( input ): pred = xgboostModel . predict ( input )[ 0 ] return pred \u5efa\u7acb Flask API (run.py) \u63a5\u8457\u6211\u5011\u8981\u900f\u904e Flask \u5efa\u7acb\u4e00\u500b API\uff0c\u9996\u5148\u8981\u8a2d\u5b9a\u958b\u653e\u8de8\u7db2\u57df CORS \u6b0a\u9650\u3002\u6240\u8b02\u7684\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab (Cross-Origin Resource Sharing, CORS) \u662f\u4e00\u7a2e\u4f7f\u7528\u984d\u5916 HTTP \u6a19\u982d\u4f86\u8b93\u76ee\u524d\u700f\u89bd\u7db2\u7ad9\u7684\u4f7f\u7528\u8005\u80fd\u8a2a\u554f\u4e0d\u540c\u4f86\u6e90\u7db2\u57df\u7684\u4f3a\u670d\u5668\u3002\u7576\u4f7f\u7528\u8005\u8acb\u6c42\u4e00\u500b\u4f86\u81ea\u65bc\u4e0d\u540c\u7db2\u57df\u3001\u901a\u8a0a\u5354\u5b9a\u6216\u901a\u8a0a\u57e0\u7684\u8cc7\u6e90\u6642\uff0c\u6703\u5efa\u7acb\u4e00\u500b\u8de8\u4f86\u6e90 HTTP \u8acb\u6c42\u3002\u6240\u4ee5\u5728\u64b0\u5beb\u7a0b\u5f0f\u7684\u6642\u5019\u5fc5\u9808\u900f\u904e flask_cors \u88e1\u9762\u6240\u63d0\u4f9b\u7684 CORS \u6dfb\u52a0\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab\u3002\u9019\u6a23\u524d\u7aef\u4f7f\u7528\u8005\u5728\u4e0d\u540c\u7db2\u57df\u5229\u7528 ajax \u6216 fetch \u5b58\u53d6 API \u6642\u5c31\u6703\u6709\u8b80\u53d6\u6b0a\u9650\u3002 # -*- coding: UTF-8 -*- import numpy as np import model from flask import Flask , request , jsonify from flask_cors import CORS app = Flask ( __name__ ) CORS ( app ) \u63a5\u8457\u6211\u5011\u5148\u793a\u7bc4\u5efa\u7acb\u4e00\u500b GET \u7684\u8def\u7531 @app.route('/') \uff0c\u55ae\u5f15\u865f\u5167\u7684\u5167\u5bb9\u5373\u4ee3\u8868\u4f7f\u7528\u8005\u5728\u547c\u53eb API \u7684\u8def\u5f91\u4f4d\u7f6e / \u4ee3\u8868\u662f root \u7684\u610f\u601d\u3002\u5728\u9019\u4e00\u500b\u6e2c\u8a66\u7684\u8def\u7531\u4e2d\u6211\u5011\u76f4\u63a5\u56de\u50b3\u4e00\u500b hello!! \u7684\u5b57\u4e32\u3002\u7a0d\u5f8c\u5c07\u6703\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Postman \u9019\u500b\u8edf\u9ad4\u4f86\u6e2c\u8a66 API\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u8def\u7531\u662f\u8ca0\u8cac\u63a5\u6536\u82b1\u6735\u56db\u500b\u6578\u503c\uff0c\u4e26\u5c07\u9019\u56db\u500b\u6578\u503c\u653e\u5230 Numpy \u9663\u5217\u4e2d\u9001\u5230\u7a0d\u65e9\u4ee5\u5c01\u88dd\u597d\u7684 mpdel.py \u4e2d\u7684 predict() \u65b9\u6cd5\u3002\u4e26\u5c07\u9810\u6e2c\u7684\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u61c9\u7d66\u524d\u7aef\u4f7f\u7528\u8005\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\u9019\u4e00\u500b\u8def\u7531\u6211\u5011\u662f\u8a2d\u5b9a\u4ed6\u7684\u8def\u5f91\u70ba /predict \u4ee5\u53ca HTTP Requesst \u7684\u65b9\u6cd5\u6307\u5b9a methods=['POST'] \u3002 @app . route ( '/' ) def index (): return 'hello!!' @app . route ( '/predict' , methods = [ 'POST' ]) def postInput (): # \u53d6\u5f97\u524d\u7aef\u50b3\u904e\u4f86\u7684\u6578\u503c insertValues = request . get_json () x1 = insertValues [ 'sepalLengthCm' ] x2 = insertValues [ 'sepalWidthCm' ] x3 = insertValues [ 'petalLengthCm' ] x4 = insertValues [ 'petalWidthCm' ] input = np . array ([[ x1 , x2 , x3 , x4 ]]) result = model . predict ( input ) return jsonify ({ 'return' : str ( result )}) \u6700\u5f8c\u6211\u5011\u900f\u904e app.run() \u5c07\u6b64 API \u90e8\u7f72\u5728\u4f3a\u670d\u5668\u7684 3000 PORT \u7576\u4e2d\u3002 host='0.0.0.0' \u8868\u793a\u9810\u8a2d\u8def\u7531\u5c07\u6703\u81ea\u52d5\u5e6b\u4f60\u53d6\u5f97\u76ee\u524d\u4f3a\u670d\u5668\u7684\u56fa\u5b9a IP \u4f4d\u7f6e\u3002\u7531\u65bc\u6211\u5011\u76ee\u524d\u5728\u672c\u6a5f\u958b\u767c\u7b49\u7b49\u6e2c\u8a66\u6642\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 http://localhost:3000 \u9032\u884c\u6e2c\u8a66\u3002\u53e6\u5916\u53c3\u6578 debug \u8a2d\u5b9a\u70ba True \u5373\u8868\u793a API \u88ab\u555f\u52d5\u6642\u6703\u81ea\u52d5\u76e3\u807d\u7a0b\u5f0f\u662f\u5426\u6709\u8b8a\u52d5\uff0c\u5982\u679c\u6709\u66f4\u65b0\u5167\u5bb9\u5373\u6703\u7acb\u523b\u91cd\u65b0\u555f\u52d5 API\u3002\u6b64\u8a2d\u5b9a\u9069\u5408\u5728\u958b\u767c\u6642\u5019\u4f7f\u7528\uff0c\u800c\u771f\u6b63\u4e0a\u7dda\u6642\u518d\u8abf\u6210 False\u3002 if __name__ == '__main__' : app . run ( host = '0.0.0.0' , port = 3000 , debug = True ) \u7ba1\u7406\u5957\u4ef6\u7248\u672c (requirements.txt) requirements.txt \u9019\u4e00\u652f\u6a94\u6848\u662f\u8ca0\u8a18\u9304\u4e86\u7576\u524d\u5c08\u6848\u8cc7\u6599\u593e\u4e0b\u7a0b\u5f0f\u6240\u6709\u4f9d\u8cf4\u7684\u5957\u4ef6\u53ca\u76f8\u5c0d\u61c9\u7684\u7248\u672c\u3002\u4e0b\u5217\u4e94\u500b\u662f\u5728\u672c\u5be6\u4f5c\u4e2d\u5c07\u6703\u4f7f\u7528\u5230\u7684\u5957\u4ef6\uff0c\u82e5\u5957\u4ef6\u5f8c\u9762\u6c92\u6709\u7279\u5225\u6307\u5b9a\u7248\u672c\u865f\uff0c\u5b89\u88dd\u6642\u5c07\u6703\u81ea\u52d5\u5b89\u88dd\u6700\u65b0\u7684\u7248\u672c\u3002 Flask Flask-Cors numpy scikit-learn xgboost \u5047\u8a2d\u7a0b\u5f0f\u5728\u53e6\u4e00\u53f0\u96fb\u8166\u4e0a\u57f7\u884c\u6642\uff0c\u8981\u4e00\u500b\u4e00\u500b\u5b89\u88dd\u5957\u4ef6\u5f88\u9ebb\u7169\u3002\u56e0\u6b64\u53ef\u4ee5\u76f4\u63a5\u900f\u904e requirements.txt \u7d00\u9304\u5c08\u6848\u4e2d\u4f9d\u8cf4\u7684\u5957\u4ef6\u3002\u4e26\u4e14\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\u4e00\u6b21\u5b89\u88dd\u6240\u6709\u6307\u5b9a\u7684\u5957\u4ef6\u3002 pip install -r requirements.txt \u57f7\u884c API \u5728\u672c\u6a5f\u6216\u958b\u767c\u74b0\u5883\u4e2d\u6e2c\u8a66\u57f7\u884c API \u7684\u65b9\u5f0f\u5f88\u7c21\u55ae\u3002\u53ea\u8981\u958b\u555f\u7d42\u7aef\u6a5f\u4e26\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\uff1a python run.py \u7a0b\u5f0f\u771f\u6b63\u4e0a\u7dda\u6642\u5efa\u8b70\u4f7f\u7528 gunicorn \u6216 Waitress \u4f86\u7522\u751f WSGI \u670d\u52d9\uff0c\u4e26\u65bc\u80cc\u666f\u904b\u884c \u9084\u8a18\u5f97\u6211\u5011\u6709\u5beb\u4e00\u500b GET \u65b9\u6cd5\u7684\u6e2c\u8a66\u8def\u7531\u55ce\uff1f\u9019\u6642\u5019\u5927\u5bb6\u53ef\u4ee5\u958b\u555f\u96fb\u8166\u4e2d\u7684\u700f\u89bd\u5668\u4e26\u5728\u7db2\u5740\u5217\u8f38\u5165 http://localhost:3000 \u5373\u53ef\u7acb\u5373\u770b\u5230 API \u5728\u6307\u5b9a\u7684\u8def\u5f91\u4e0b\u6240\u56de\u61c9\u7684\u5167\u5bb9\u3002\u5982\u679c\u51fa\u73fe\u4ee5\u4e0b\u756b\u9762\u5373\u4ee3\u8868 API \u5df2\u7d93\u6b63\u5e38\u7684\u88ab\u904b\u884c\u56c9\u3002 \u90a3\u4f60\u53ef\u80fd\u6703\u554f\u6211\u8a72\u600e\u9ebc\u6e2c\u8a66\u53e6\u4e00\u500b POST \u65b9\u6cd5\u5462\uff1f\u7531\u65bc GET \u65b9\u6cd5\u6bd4\u8f03\u597d\u8655\u7406\uff0c\u6211\u5011\u76f4\u63a5\u5728\u700f\u89bd\u5668\u8f38\u5165\u8def\u5f91\u5c31\u80fd\u7acb\u5373\u89c0\u770b\u7d50\u679c\u3002\u90a3\u7576\u6211\u5011\u8981\u6e2c\u8a66 POST\u3001PUT\u3001DELETE \u7b49\u65b9\u6cd5\u6642\u5c31\u5fc5\u9808\u4f9d\u9760\u7b2c\u4e09\u65b9\u8edf\u9ad4 Postman \u4f86\u5354\u52a9\u6a21\u64ec HTTP Request \u5b8c\u6210 API \u6e2c\u8a66\u3002 \u6e2c\u8a66 API \u7684\u597d\u5de5\u5177 Postman \u7576\u4f60\u5beb\u597d\u4e00\u652f API \u6642\u8981\u99ac\u4e0a\u6e2c\u8a66\u770b\u770b\u4f60\u5beb\u7684\u7a0b\u5f0f\u908f\u8f2f\u662f\u5426\u6b63\u78ba\uff0c\u5c31\u53ef\u4ee5\u4f7f\u7528 Postman \u9019\u500b\u8edf\u9ad4\u4f86\u505a API \u6e2c\u8a66\u3002Postman \u4ed6\u662f\u4e00\u500b\u80fd\u5920\u6a21\u64ec HTTP Request \u7684\u5de5\u5177\u80fd\u5920\u8b93\u4f60\u7c21\u55ae\u5feb\u901f\u7684\u6e2c\u8a66\u4f60\u7684 API\uff0c\u4e26\u4e14\u5167\u5efa\u5305\u542b\u8a31\u591a HTTP \u7684\u8acb\u6c42\u65b9\u5f0f\uff0c\u4f8b\u5982\u5e38\u898b\u7684 GET(\u53d6\u5f97)\u3001POST(\u65b0\u589e)\u3001PUT(\u4fee\u6539)\u3001DELETE(\u522a\u9664)\u3002\u9996\u5148\u5927\u5bb6\u53ef\u4ee5\u5230 \u5b98\u7db2 \u4e0b\u8f09\u8207\u5b89\u88dd\u3002 \u5b89\u88dd\u597d\u4e4b\u5f8c\u53ef\u4ee5\u6253\u958b\u7a0b\u5f0f\u4e26\u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740\u3002\u7531\u65bc\u6211\u5011\u73fe\u5728\u8981\u6e2c\u8a66\u53e6\u4e00\u500b\u9810\u6e2c\u7684\u8def\u5f91 predict \uff0c\u56e0\u6b64\u5728\u7db2\u5740\u5217\u8cbc\u4e0a http://localhost:3000/predict \u3002\u9ede\u9078 Body-> raw -> JSON \u4e26\u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u9032\u884c\u63cf\u8ff0\u3002 { \"sepalLengthCm\" : 5.9 , \"sepalWidthCm\" : 3 , \"petalLengthCm\" : 5.1 , \"petalWidthCm\" : 1.8 } \u9ede\u9078 send \u5f8c\u5373\u53ef\u5c07\u6a21\u64ec\u7684\u56db\u500b\u6578\u503c\u900f\u904e JSON \u683c\u5f0f\u4f7f\u7528 POST \u65b9\u6cd5\u50b3\u9001\u5230\u5f8c\u7aef API \u4e2d\u7684 predict \u8def\u5f91\u3002\u8a72 API \u900f\u904e POST \u63a5\u6536\u5230\u524d\u7aef\u4f7f\u7528\u8005\u6240\u767c\u9001\u7684\u8a0a\u606f\u5f8c\uff0c\u89e3\u6790\u9019\u56db\u500b\u6578\u503c\u4e26\u4f9d\u5e8f\u653e\u5728\u9663\u5217\u4e2d\u4e26\u9032\u884c\u6a21\u578b\u9810\u6e2c\u3002\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6703\u5c07\u82b1\u7684\u7a2e\u985e\u4ee5 JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u6b64\u6642\u524d\u7aef\u7684\u7db2\u9801\u8a2d\u8a08\u5e2b\u5c31\u53ef\u4ee5\u5c07\u62ff\u5230\u7684\u9810\u6e2c\u7d50\u679c\u9032\u884c\u524d\u7aef\u7684\u756b\u9762\u6e32\u67d3\u8207\u66f4\u65b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#day-29-python-flask-api","text":"","title":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#_1","text":"API \u89c0\u5ff5\u8b1b\u89e3 \u4ec0\u9ebc\u662f API\uff1f RESTful API HTTP Request \u65b9\u6cd5 \u624b\u628a\u624b\u5be6\u4f5c\u4e00\u500b\u82b1\u6735\u5206\u985e\u5668 API \u900f\u904e Python Flask \u5efa\u7f6e\u4e00\u500b\u5f8c\u7aef\u9810\u6e2c\u6a21\u578b API \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#_2","text":"\u7576\u6a21\u578b\u8a13\u7df4\u5b8c\u4ee5\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u5c31\u662f\u61c9\u7528\u8207\u843d\u5730\u3002\u6211\u5011\u53ef\u4ee5\u8a2d\u8a08\u4e00\u500b\u5d4c\u5165\u5f0f\u7cfb\u7d71\u8207\u4f7f\u7528\u8005\u4e92\u52d5\uff0c\u4f8b\u5982\u6a39\u8393\u6d3e\u3001Jetson Nano\u3001NeuroPilot...\u7b49\u786c\u9ad4\u4f86\u5354\u52a9 AI \u6a21\u578b\u7684\u908a\u7de3\u904b\u7b97\u3002\u6216\u662f\u8a2d\u8a08\u4e00\u500b\u624b\u6a5f APP \u4ee5\u53ca\u7db2\u9801\u61c9\u7528\u3002\u5f88\u591a\u4eba\u53ef\u80fd\u6703\u6709\u7591\u554f\u6a21\u578b\u8a13\u7df4\u597d\u7136\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u8a72\u600e\u505a\uff1f\u6700\u5e38\u898b\u7684\u505a\u6cd5\u5c31\u662f\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\u4e26\u5efa\u7acb\u4e00\u500b API \u90e8\u7f72\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u4e2d\uff0c\u63a5\u8457\u4efb\u4f55\u7684\u7d42\u7aef\u8a2d\u5099\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e00\u500b API \u9032\u884c\u8cc7\u6599\u7684\u5b58\u53d6\u8207\u6a21\u578b\u9810\u6e2c\u3002\u4e0b\u5716\u662f\u4e00\u500b\u7c21\u55ae\u7684\u6a21\u578b\u843d\u5730\u7684\u61c9\u7528\u60c5\u5883\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u90e8\u7f72\u6a21\u578b\u4e26\u5efa\u7acb\u4e00\u500b API \u7684\u63a5\u53e3\u8207\u524d\u7aef\u4f7f\u7528\u8005\u4e92\u52d5\u3002\u524d\u7aef\u7db2\u9801\u7684\u4f7f\u7528\u8005\u900f\u904e HTTP \u7684\u5354\u5b9a\u8207\u5f8c\u7aef\u4f3a\u670d\u5668\u9032\u884c\u901a\u8a0a\u8207\u8cc7\u6599\u4ea4\u63db\uff0c\u6700\u7d42\u6a21\u578b\u7684\u9810\u6e2c\u7d50\u679c\u6703\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u4e26\u5c07\u7d50\u679c\u9078\u67d3\u5728\u7db2\u9801\u4e0a\u3002\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u5167\u5bb9 [Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \uff0c\u76ee\u524d\u5df2\u7d93\u6210\u529f\u7684\u8f38\u51fa\u6a21\u578b\u3002\u4eca\u5929\u5c31\u4f86\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Python Flask \u67b6\u8a2d\u4e00\u500b\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u7684 API \u5427\uff01","title":"\u524d\u8a00"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api","text":"\u6240\u8b02\u7684 API \u4e2d\u6587\u5168\u540d\u70ba\u61c9\u7528\u7a0b\u5f0f\u4ecb\u9762 (Application Programming Interface) \u662f\u5c6c\u65bc\u5ba2\u6236\u7aef\u8207\u4f3a\u670d\u7aef\u7684\u6e9d\u901a\u6a4b\u6a11\uff0c\u5b83\u63d0\u4f9b\u4e00\u500b\u7aef\u53e3\u80fd\u5920\u9032\u884c\u8cc7\u6599\u4ea4\u63db\u3002\u7c21\u55ae\u4f86\u8aaa\u662f\u4e00\u500b\u524d\u7aef\u8207\u5f8c\u7aef\u7684\u4e00\u500b\u6e9d\u901a\u4ecb\u9762\u3002 \u53e6\u5916\u5927\u5bb6\u53ef\u80fd\u807d\u904e\u4e00\u500b\u540d\u8a5e\u53eb\u505a RESTful API\u3002\u6240\u8b02\u7684 REST \u70ba Representational State Transfer \u7684\u7e2e\u5beb\u662f\u4e00\u7a2e\u7db2\u8def\u67b6\u69cb\u98a8\u683c\uff0c\u8fd1\u5e7e\u5e74\u4f86 REST \u7684\u6982\u5ff5\u5df2\u7d93\u88ab\u5be6\u4f5c\u5728\u5927\u578b\u7db2\u8def\u7cfb\u7d71\u4e2d\uff0c\u800c\u5728 Web \u670d\u52d9\u4e2d\u4f7f\u7528 REST \u6982\u5ff5\u88ab\u5be6\u4f5c\u51fa\u4f86\u7684 API \u5c31\u7c21\u7a31\u70ba RESTful API \u4ed6\u662f\u4f7f\u7528 HTTP \u7684\u5354\u5b9a\u5b8c\u6574\u5b9a\u7fa9 Web \u670d\u52d9\u5728 HTTP Request \u7684\u5404\u7a2e\u6d41\u7a0b\u3002","title":"\u4ec0\u9ebc\u662f API\uff1f"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#http-request","text":"\u900f\u904e\u7db2\u8def\u5354\u5b9a HTTP Request \u4e0d\u540c\u7684\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5be6\u73fe\u4e0d\u540c\u7684\u8cc7\u6599\u4ea4\u63db\u8acb\u6c42\u65b9\u5f0f\u3002HTTP \u672c\u8eab\u5c31\u662f REST \u7684\u5be6\u4f5c\uff0c\u6240\u8b02\u7684 HTTP Request \u5b9a\u7fa9\u4e86\u516b\u7a2e\u8acb\u6c42\u65b9\u6cd5\u5206\u5225\u70ba\uff1a GET \uff1a\u6b64\u65b9\u6cd5\u53ea\u80fd\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u8981\u6c42\u53d6\u5f97\u8cc7\u6599\uff0c\u4e26\u4e0d\u6703\u66f4\u52d5\u5230\u5167\u90e8\u8cc7\u6e90\u3002 HEAD\uff1aHEAD \u8ddf GET \u65b9\u6cd5\u985e\u4f3c\u53ea\u5dee\u5225\u5728\u5b83\u4e26\u4e0d\u6703\u56de\u50b3\u4f60\u6240\u8acb\u6c42\u7684\u8cc7\u6e90\u5728 body \u4e0a\uff0c\u53ea\u56de\u50b3 HTTP header\u3002 POST \uff1a\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u63d0\u4ea4\u8cc7\u6599\u3002 PUT \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u63d0\u4ea4\u66f4\u65b0\u5167\u5bb9\u3002 DELETE \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u8acb\u6c42\u522a\u9664\u5167\u5bb9\u3002 CONNECT\uff1aHTTP/1.1\u5354\u8b70\u4e2d\u9810\u7559\u7d66\u80fd\u5920\u5c07\u9023\u63a5\u6539\u70ba\u7ba1\u9053\u65b9\u5f0f\u7684\u4ee3\u7406\u670d\u52d9\u5668\u3002 OPTIONS\uff1a\u6b64\u65b9\u6cd5\u53ef\u4f7f\u670d\u52d9\u5668\u50b3\u56de\u8a72\u8cc7\u6e90\u6240\u652f\u6301\u7684\u6240\u6709 HTTP \u8acb\u6c42\u65b9\u6cd5\u3002 TRACE\uff1a\u56de\u986f\u670d\u52d9\u5668\u6536\u5230\u7684\u8acb\u6c42\uff0c\u4e3b\u8981\u7528\u65bc\u6e2c\u8a66\u6216\u8a3a\u65b7\u3002","title":"HTTP Request \u65b9\u6cd5"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api_1","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c] \u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#python-flask-api","text":"Flask \u662f\u4e00\u500b\u4f7f\u7528 Python \u8a9e\u8a00\u7de8\u5beb\u7684\u8f15\u91cf\u7d1a Web \u61c9\u7528\u6846\u67b6\u3002\u5728\u4eca\u65e5\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u5ef6\u7e8c\u6628\u5929\u6240\u5132\u5b58\u7684\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u6a21\u578b\uff0c\u5efa\u7acb\u4e00\u500b\u82b1\u6735\u5206\u985e\u9810\u6e2c\u7684 API\u3002\u4f7f\u7528\u8005\u53ef\u4ee5\u900f\u904e POST \u5354\u5b9a\u5f9e\u524d\u7aef\u7db2\u9801\u767c\u9001\u56db\u500b\u6578\u503c\u5206\u5225\u70ba\u82b1\u843c\u7684\u9577\u8207\u5bec\u4ee5\u53ca\u82b1\u74e3\u7684\u9577\u8207\u5bec\u3002\u5f8c\u7aef\u7a0b\u5f0f\u6536\u5230\u6578\u503c\u5f8c\u9001\u7d66\u4e8b\u5148\u6253\u5305\u597d\u7684\u6a21\u578b\uff0c\u4e26\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u4ee5\u4e0b\u70ba\u7a0b\u5f0f\u6574\u500b\u6a39\u72c0\u7d50\u69cb\uff0c\u5176\u4e2d\u5728\u6700\u5916\u5c64\u8cc7\u6599\u593e\u6709\u4e09\u500b\u6a94\u6848\u5206\u5225\u6709\u5c07\u6a21\u578b\u5c01\u88dd\u6210\u51fd\u5f0f\u7684 model.py \u8207 Flask \u4e3b\u7a0b\u5f0f run.py \u4ee5\u53ca\u8ca0\u8cac\u7ba1\u7406\u5c08\u6848\u5957\u4ef6\u7684 requirements.txt \u3002\u53e6\u5916\u5728 model \u8cc7\u6599\u593e\u4e2d\u8ca0\u8cac\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u58d3\u7e2e\u6a94\u3002 . \u251c\u2500\u2500 model \u2502 \u2514\u2500\u2500 xgboost-iris.pgz \u251c\u2500\u2500 model.py \u251c\u2500\u2500 requirements.txt \u2514\u2500\u2500 run.py","title":"\u5efa\u7acb Python Flask API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#modelpy","text":"\u9996\u5148\u5efa\u7acb\u4e00\u500b model.py \u6a94\u6848\uff0c\u5728\u9019\u500b\u6a94\u6848\u4e2d\u6211\u5011\u8981\u8f09\u5165\u4e8b\u5148\u8a13\u7df4\u597d\u7684\u6a21\u578b\u4e26\u5c07\u5b83\u5c01\u88dd\u6210\u4e00\u500b function \u6216\u662f class\u3002\u5728\u672c\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u662f\u5efa\u7acb\u4e00\u500b predict() \u7684\u51fd\u5f0f\u4e26\u4e14\u5141\u8a31\u63a5\u6536\u4e00\u500b Numpy \u7684\u9663\u5217\uff0c\u5176\u4e2d\u88e1\u9762\u5141\u8a31\u593e\u5e36\u56db\u500b\u82b1\u6735\u7279\u5fb5\u7684\u6578\u503c\u3002\u6700\u5f8c\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u5b58\u653e\u5728 pred \u8b8a\u6578\u4e2d\uff0c\u4e26\u5c07\u9810\u6e2c\u7684\u985e\u5225\u56de\u50b3\u3002 # -*- coding: UTF-8 -*- import pickle import gzip # \u8f09\u5165\u6a21\u578b with gzip . open ( './model/xgboost-iris.pgz' , 'rb' ) as f : xgboostModel = pickle . load ( f ) # \u5c07\u6a21\u578b\u9810\u6e2c\u5beb\u6210\u4e00\u500b function def predict ( input ): pred = xgboostModel . predict ( input )[ 0 ] return pred","title":"\u5c01\u88dd\u9810\u6e2c\u6a21\u578b (model.py)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#flask-api-runpy","text":"\u63a5\u8457\u6211\u5011\u8981\u900f\u904e Flask \u5efa\u7acb\u4e00\u500b API\uff0c\u9996\u5148\u8981\u8a2d\u5b9a\u958b\u653e\u8de8\u7db2\u57df CORS \u6b0a\u9650\u3002\u6240\u8b02\u7684\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab (Cross-Origin Resource Sharing, CORS) \u662f\u4e00\u7a2e\u4f7f\u7528\u984d\u5916 HTTP \u6a19\u982d\u4f86\u8b93\u76ee\u524d\u700f\u89bd\u7db2\u7ad9\u7684\u4f7f\u7528\u8005\u80fd\u8a2a\u554f\u4e0d\u540c\u4f86\u6e90\u7db2\u57df\u7684\u4f3a\u670d\u5668\u3002\u7576\u4f7f\u7528\u8005\u8acb\u6c42\u4e00\u500b\u4f86\u81ea\u65bc\u4e0d\u540c\u7db2\u57df\u3001\u901a\u8a0a\u5354\u5b9a\u6216\u901a\u8a0a\u57e0\u7684\u8cc7\u6e90\u6642\uff0c\u6703\u5efa\u7acb\u4e00\u500b\u8de8\u4f86\u6e90 HTTP \u8acb\u6c42\u3002\u6240\u4ee5\u5728\u64b0\u5beb\u7a0b\u5f0f\u7684\u6642\u5019\u5fc5\u9808\u900f\u904e flask_cors \u88e1\u9762\u6240\u63d0\u4f9b\u7684 CORS \u6dfb\u52a0\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab\u3002\u9019\u6a23\u524d\u7aef\u4f7f\u7528\u8005\u5728\u4e0d\u540c\u7db2\u57df\u5229\u7528 ajax \u6216 fetch \u5b58\u53d6 API \u6642\u5c31\u6703\u6709\u8b80\u53d6\u6b0a\u9650\u3002 # -*- coding: UTF-8 -*- import numpy as np import model from flask import Flask , request , jsonify from flask_cors import CORS app = Flask ( __name__ ) CORS ( app ) \u63a5\u8457\u6211\u5011\u5148\u793a\u7bc4\u5efa\u7acb\u4e00\u500b GET \u7684\u8def\u7531 @app.route('/') \uff0c\u55ae\u5f15\u865f\u5167\u7684\u5167\u5bb9\u5373\u4ee3\u8868\u4f7f\u7528\u8005\u5728\u547c\u53eb API \u7684\u8def\u5f91\u4f4d\u7f6e / \u4ee3\u8868\u662f root \u7684\u610f\u601d\u3002\u5728\u9019\u4e00\u500b\u6e2c\u8a66\u7684\u8def\u7531\u4e2d\u6211\u5011\u76f4\u63a5\u56de\u50b3\u4e00\u500b hello!! \u7684\u5b57\u4e32\u3002\u7a0d\u5f8c\u5c07\u6703\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Postman \u9019\u500b\u8edf\u9ad4\u4f86\u6e2c\u8a66 API\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u8def\u7531\u662f\u8ca0\u8cac\u63a5\u6536\u82b1\u6735\u56db\u500b\u6578\u503c\uff0c\u4e26\u5c07\u9019\u56db\u500b\u6578\u503c\u653e\u5230 Numpy \u9663\u5217\u4e2d\u9001\u5230\u7a0d\u65e9\u4ee5\u5c01\u88dd\u597d\u7684 mpdel.py \u4e2d\u7684 predict() \u65b9\u6cd5\u3002\u4e26\u5c07\u9810\u6e2c\u7684\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u61c9\u7d66\u524d\u7aef\u4f7f\u7528\u8005\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\u9019\u4e00\u500b\u8def\u7531\u6211\u5011\u662f\u8a2d\u5b9a\u4ed6\u7684\u8def\u5f91\u70ba /predict \u4ee5\u53ca HTTP Requesst \u7684\u65b9\u6cd5\u6307\u5b9a methods=['POST'] \u3002 @app . route ( '/' ) def index (): return 'hello!!' @app . route ( '/predict' , methods = [ 'POST' ]) def postInput (): # \u53d6\u5f97\u524d\u7aef\u50b3\u904e\u4f86\u7684\u6578\u503c insertValues = request . get_json () x1 = insertValues [ 'sepalLengthCm' ] x2 = insertValues [ 'sepalWidthCm' ] x3 = insertValues [ 'petalLengthCm' ] x4 = insertValues [ 'petalWidthCm' ] input = np . array ([[ x1 , x2 , x3 , x4 ]]) result = model . predict ( input ) return jsonify ({ 'return' : str ( result )}) \u6700\u5f8c\u6211\u5011\u900f\u904e app.run() \u5c07\u6b64 API \u90e8\u7f72\u5728\u4f3a\u670d\u5668\u7684 3000 PORT \u7576\u4e2d\u3002 host='0.0.0.0' \u8868\u793a\u9810\u8a2d\u8def\u7531\u5c07\u6703\u81ea\u52d5\u5e6b\u4f60\u53d6\u5f97\u76ee\u524d\u4f3a\u670d\u5668\u7684\u56fa\u5b9a IP \u4f4d\u7f6e\u3002\u7531\u65bc\u6211\u5011\u76ee\u524d\u5728\u672c\u6a5f\u958b\u767c\u7b49\u7b49\u6e2c\u8a66\u6642\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 http://localhost:3000 \u9032\u884c\u6e2c\u8a66\u3002\u53e6\u5916\u53c3\u6578 debug \u8a2d\u5b9a\u70ba True \u5373\u8868\u793a API \u88ab\u555f\u52d5\u6642\u6703\u81ea\u52d5\u76e3\u807d\u7a0b\u5f0f\u662f\u5426\u6709\u8b8a\u52d5\uff0c\u5982\u679c\u6709\u66f4\u65b0\u5167\u5bb9\u5373\u6703\u7acb\u523b\u91cd\u65b0\u555f\u52d5 API\u3002\u6b64\u8a2d\u5b9a\u9069\u5408\u5728\u958b\u767c\u6642\u5019\u4f7f\u7528\uff0c\u800c\u771f\u6b63\u4e0a\u7dda\u6642\u518d\u8abf\u6210 False\u3002 if __name__ == '__main__' : app . run ( host = '0.0.0.0' , port = 3000 , debug = True )","title":"\u5efa\u7acb Flask API (run.py)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#requirementstxt","text":"requirements.txt \u9019\u4e00\u652f\u6a94\u6848\u662f\u8ca0\u8a18\u9304\u4e86\u7576\u524d\u5c08\u6848\u8cc7\u6599\u593e\u4e0b\u7a0b\u5f0f\u6240\u6709\u4f9d\u8cf4\u7684\u5957\u4ef6\u53ca\u76f8\u5c0d\u61c9\u7684\u7248\u672c\u3002\u4e0b\u5217\u4e94\u500b\u662f\u5728\u672c\u5be6\u4f5c\u4e2d\u5c07\u6703\u4f7f\u7528\u5230\u7684\u5957\u4ef6\uff0c\u82e5\u5957\u4ef6\u5f8c\u9762\u6c92\u6709\u7279\u5225\u6307\u5b9a\u7248\u672c\u865f\uff0c\u5b89\u88dd\u6642\u5c07\u6703\u81ea\u52d5\u5b89\u88dd\u6700\u65b0\u7684\u7248\u672c\u3002 Flask Flask-Cors numpy scikit-learn xgboost \u5047\u8a2d\u7a0b\u5f0f\u5728\u53e6\u4e00\u53f0\u96fb\u8166\u4e0a\u57f7\u884c\u6642\uff0c\u8981\u4e00\u500b\u4e00\u500b\u5b89\u88dd\u5957\u4ef6\u5f88\u9ebb\u7169\u3002\u56e0\u6b64\u53ef\u4ee5\u76f4\u63a5\u900f\u904e requirements.txt \u7d00\u9304\u5c08\u6848\u4e2d\u4f9d\u8cf4\u7684\u5957\u4ef6\u3002\u4e26\u4e14\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\u4e00\u6b21\u5b89\u88dd\u6240\u6709\u6307\u5b9a\u7684\u5957\u4ef6\u3002 pip install -r requirements.txt","title":"\u7ba1\u7406\u5957\u4ef6\u7248\u672c (requirements.txt)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api_2","text":"\u5728\u672c\u6a5f\u6216\u958b\u767c\u74b0\u5883\u4e2d\u6e2c\u8a66\u57f7\u884c API \u7684\u65b9\u5f0f\u5f88\u7c21\u55ae\u3002\u53ea\u8981\u958b\u555f\u7d42\u7aef\u6a5f\u4e26\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\uff1a python run.py \u7a0b\u5f0f\u771f\u6b63\u4e0a\u7dda\u6642\u5efa\u8b70\u4f7f\u7528 gunicorn \u6216 Waitress \u4f86\u7522\u751f WSGI \u670d\u52d9\uff0c\u4e26\u65bc\u80cc\u666f\u904b\u884c \u9084\u8a18\u5f97\u6211\u5011\u6709\u5beb\u4e00\u500b GET \u65b9\u6cd5\u7684\u6e2c\u8a66\u8def\u7531\u55ce\uff1f\u9019\u6642\u5019\u5927\u5bb6\u53ef\u4ee5\u958b\u555f\u96fb\u8166\u4e2d\u7684\u700f\u89bd\u5668\u4e26\u5728\u7db2\u5740\u5217\u8f38\u5165 http://localhost:3000 \u5373\u53ef\u7acb\u5373\u770b\u5230 API \u5728\u6307\u5b9a\u7684\u8def\u5f91\u4e0b\u6240\u56de\u61c9\u7684\u5167\u5bb9\u3002\u5982\u679c\u51fa\u73fe\u4ee5\u4e0b\u756b\u9762\u5373\u4ee3\u8868 API \u5df2\u7d93\u6b63\u5e38\u7684\u88ab\u904b\u884c\u56c9\u3002 \u90a3\u4f60\u53ef\u80fd\u6703\u554f\u6211\u8a72\u600e\u9ebc\u6e2c\u8a66\u53e6\u4e00\u500b POST \u65b9\u6cd5\u5462\uff1f\u7531\u65bc GET \u65b9\u6cd5\u6bd4\u8f03\u597d\u8655\u7406\uff0c\u6211\u5011\u76f4\u63a5\u5728\u700f\u89bd\u5668\u8f38\u5165\u8def\u5f91\u5c31\u80fd\u7acb\u5373\u89c0\u770b\u7d50\u679c\u3002\u90a3\u7576\u6211\u5011\u8981\u6e2c\u8a66 POST\u3001PUT\u3001DELETE \u7b49\u65b9\u6cd5\u6642\u5c31\u5fc5\u9808\u4f9d\u9760\u7b2c\u4e09\u65b9\u8edf\u9ad4 Postman \u4f86\u5354\u52a9\u6a21\u64ec HTTP Request \u5b8c\u6210 API \u6e2c\u8a66\u3002","title":"\u57f7\u884c API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api-postman","text":"\u7576\u4f60\u5beb\u597d\u4e00\u652f API \u6642\u8981\u99ac\u4e0a\u6e2c\u8a66\u770b\u770b\u4f60\u5beb\u7684\u7a0b\u5f0f\u908f\u8f2f\u662f\u5426\u6b63\u78ba\uff0c\u5c31\u53ef\u4ee5\u4f7f\u7528 Postman \u9019\u500b\u8edf\u9ad4\u4f86\u505a API \u6e2c\u8a66\u3002Postman \u4ed6\u662f\u4e00\u500b\u80fd\u5920\u6a21\u64ec HTTP Request \u7684\u5de5\u5177\u80fd\u5920\u8b93\u4f60\u7c21\u55ae\u5feb\u901f\u7684\u6e2c\u8a66\u4f60\u7684 API\uff0c\u4e26\u4e14\u5167\u5efa\u5305\u542b\u8a31\u591a HTTP \u7684\u8acb\u6c42\u65b9\u5f0f\uff0c\u4f8b\u5982\u5e38\u898b\u7684 GET(\u53d6\u5f97)\u3001POST(\u65b0\u589e)\u3001PUT(\u4fee\u6539)\u3001DELETE(\u522a\u9664)\u3002\u9996\u5148\u5927\u5bb6\u53ef\u4ee5\u5230 \u5b98\u7db2 \u4e0b\u8f09\u8207\u5b89\u88dd\u3002 \u5b89\u88dd\u597d\u4e4b\u5f8c\u53ef\u4ee5\u6253\u958b\u7a0b\u5f0f\u4e26\u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740\u3002\u7531\u65bc\u6211\u5011\u73fe\u5728\u8981\u6e2c\u8a66\u53e6\u4e00\u500b\u9810\u6e2c\u7684\u8def\u5f91 predict \uff0c\u56e0\u6b64\u5728\u7db2\u5740\u5217\u8cbc\u4e0a http://localhost:3000/predict \u3002\u9ede\u9078 Body-> raw -> JSON \u4e26\u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u9032\u884c\u63cf\u8ff0\u3002 { \"sepalLengthCm\" : 5.9 , \"sepalWidthCm\" : 3 , \"petalLengthCm\" : 5.1 , \"petalWidthCm\" : 1.8 } \u9ede\u9078 send \u5f8c\u5373\u53ef\u5c07\u6a21\u64ec\u7684\u56db\u500b\u6578\u503c\u900f\u904e JSON \u683c\u5f0f\u4f7f\u7528 POST \u65b9\u6cd5\u50b3\u9001\u5230\u5f8c\u7aef API \u4e2d\u7684 predict \u8def\u5f91\u3002\u8a72 API \u900f\u904e POST \u63a5\u6536\u5230\u524d\u7aef\u4f7f\u7528\u8005\u6240\u767c\u9001\u7684\u8a0a\u606f\u5f8c\uff0c\u89e3\u6790\u9019\u56db\u500b\u6578\u503c\u4e26\u4f9d\u5e8f\u653e\u5728\u9663\u5217\u4e2d\u4e26\u9032\u884c\u6a21\u578b\u9810\u6e2c\u3002\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6703\u5c07\u82b1\u7684\u7a2e\u985e\u4ee5 JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u6b64\u6642\u524d\u7aef\u7684\u7db2\u9801\u8a2d\u8a08\u5e2b\u5c31\u53ef\u4ee5\u5c07\u62ff\u5230\u7684\u9810\u6e2c\u7d50\u679c\u9032\u884c\u524d\u7aef\u7684\u756b\u9762\u6e32\u67d3\u8207\u66f4\u65b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6e2c\u8a66 API \u7684\u597d\u5de5\u5177 Postman"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/","text":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427! \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u804a\u804a\u4f55\u8b02 EDA\uff0c\u70ba\u4f55\u8981\u505a\u6578\u64da\u5206\u6790? \u64b0\u5beb\u7b2c\u4e00\u652f EDA \u7a0b\u5f0f \u900f\u904e\u9cf6\u5c3e\u82b1 (iris) \u8cc7\u6599\u96c6\uff0c\u4f86\u67e5\u770b\u8cc7\u6599\u7684\u5206\u4f48\u72c0\u614b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u63a2\u7d22\u5f0f\u8cc7\u6599\u5206\u6790 (Exploratory Data Analysis, EDA)\uff0c\u4e3b\u8981\u6982\u5ff5\u662f\u5229\u7528\u6578\u64da\u7d71\u8a08\u7684\u65b9\u5f0f\u8996\u89ba\u5316\u8cc7\u6599\u3002\u900f\u904e\u8cc7\u6599\u7684\u63a2\u7d22\u5f0f\u5206\u6790\u53ef\u4ee5\u67e5\u770b\u8cc7\u6599\u96c6\u7576\u4e2d\u6bcf\u500b\u7279\u5fb5\u5f7c\u6b64\u7684\u91cd\u8981\u7a0b\u5ea6\u4ee5\u53ca\u5176\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1\uff0c\u6709\u826f\u597d\u7684\u6578\u64da\u5206\u6790\u7fd2\u6163\u80fd\u5920\u5e6b\u52a9\u4f60\u66f4\u4e86\u89e3\u8cc7\u6599\u96c6\u7684\u7279\u6027\u3002\u53e6\u5916\u505a EDA \u7684\u597d\u8655\u662f\u53ef\u4ee5\u5f9e\u5404\u7a2e\u9762\u5411\u5148\u4e86\u89e3\u8cc7\u6599\u7684\u72c0\u6cc1\uff0c\u4ee5\u5229\u5f8c\u7e8c\u7684\u6a21\u578b\u5206\u6790\u3002 EDA \u5fc5\u8981\u7684\u5957\u4ef6 \u8cc7\u6599\u8655\u7406 \u2013 Pandas, Numpy Pandas \uff1aPython \u8868\u683c\u8cc7\u6599\u8655\u7406\u7684\u91cd\u8981\u5de5\u5177 Numpy \uff1a\u91dd\u5c0d\u591a\u7dad\u9663\u5217\u7684\u5e73\u884c\u904b\u7b97\u9032\u884c\u512a\u5316\u7684\u5f37\u5927\u51fd\u5f0f\u5eab \u7e6a\u5716\u76f8\u95dc \u2013 Matplotlib, Seaborn Matplotlib \uff1aPython \u6700\u5e38\u88ab\u4f7f\u7528\u5230\u7684\u7e6a\u5716\u5957\u4ef6 Seaborn \uff1a\u4ee5 matplotlib \u70ba\u5e95\u5c64\u7684\u9ad8\u968e\u7e6a\u5716\u5957\u4ef6 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4e00\u89bd \u6b64\u8cc7\u6599\u96c6\u7e3d\u5171\u67094\u500b\u8f38\u5165\u7279\u5fb5\u3002\u5206\u5225\u70ba\u82b1\u843c\u9577\u5ea6\u3001\u82b1\u843c\u5bec\u5ea6\u3001\u82b1\u74e3\u9577\u5ea6\u8207\u82b1\u74e3\u5bec\u5ea6\u3002\u8f38\u51fa\u7279\u5fb5\u70ba\u82b1\u6735\u7684\u54c1\u7a2e\uff0c\u5171\u6709\u4e09\u7a2e\u985e\u5225\u5206\u5225\u70ba 0: iris setosa\u3001 1: iris versicolor\u3001 2: iris virginica\u3002 \u8f09\u5165\u5fc5\u8981\u5957\u4ef6 \u9996\u5148\u6211\u5011\u8f09\u5165\u8cc7\u6599\u63a2\u7d22\u5f0f\u5206\u6790\u6240\u9700\u7684\u5957\u4ef6\u3002\u5206\u5225\u6709\u9032\u884c\u6578\u64da\u8655\u7406\u7684\u51fd\u5f0f\u5eab\u7684 pandas \u3001\u9ad8\u968e\u5927\u91cf\u7684\u7dad\u5ea6\u9663\u5217\u8207\u77e9\u9663\u904b\u7b97\u7684 numpy \u3001\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7e6a\u5716\u5eab matplotlib \u8207 seaborn \u3002\u6700\u5f8c\u4e00\u500b\u662f\u8cc7\u6599\u96c6\u4f86\u6e90\uff0c\u6b64\u7cfb\u5217\u7bc4\u4f8b\u6211\u5011\u63a1\u7528 Sklearn \u6240\u63d0\u4f9b\u7684\u9cf6\u5c3e\u82b1\u5206\u985e\u7684\u8cc7\u6599\u96c6\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris Sklearn Toy datasets Sklearn \u5957\u4ef6\u4e2d\u63d0\u4f9b\u4e86\u4e03\u500b\u5feb\u901f\u5165\u9580\u7684 Toy datasets \u5f88\u63a8\u85a6\u521d\u5b78\u8005\u53ef\u4ee5\u8f09\u5165\u4f86\u73a9\u73a9\u770b\uff0c\u4e26\u4e14\u7df4\u7fd2\u505a\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u6a21\u3002\u6bcf\u4e00\u500b\u8cc7\u6599\u96c6\u547c\u53eb\u7684\u65b9\u6cd5\u975e\u5e38\u7c21\u55ae\u3002\u4ee5\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u70ba\u4f8b\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e API \u53d6\u5f97\u8f38\u5165\u8207\u8f38\u51fa\u3002 from sklearn.datasets import load_iris iris = load_iris () # \u8f38\u5165\u7279\u5fb5 X = iris . data # \u8f38\u51fa\u7279\u5fb5 y = iris . target Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a API \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff1a data: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5 target: \u53d6\u5f97\u8f38\u51fa\u7279\u5fb5 feature_names: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5\u7684\u540d\u7a31 target_names: \u53d6\u5f97\u8f38\u51fa\u7684\u985e\u5225\u6a19\u7c64(\u5206\u985e\u8cc7\u6599\u96c6) DESCR: \u8cc7\u6599\u96c6\u8a73\u7d30\u63cf\u8ff0 \u5982\u679c\u60f3\u8a66\u8a66\u5176\u4ed6\u7684\u8cc7\u6599\u96c6\u53ef\u4ee5\u53c3\u8003\uff1a \u8ff4\u6b78\u554f\u984c load_boston \u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c load_diabetes \u7cd6\u5c3f\u75c5\u9810\u6e2c load_linnerud \u9ad4\u80fd\u8a55\u4f30\u9810\u6e2c \u5206\u985e\u554f\u984c load_iris \u9cf6\u5c3e\u82b1\u7a2e\u985e\u9810\u6e2c load_digits \u624b\u5beb\u6578\u5b57\u8fa8\u8b58 load_wine \u8461\u8404\u9152\u7a2e\u985e\u9810\u6e2c load_breast_cancer \u4e73\u764c\u9810\u6e2c \u53c3\u8003 \u8f09\u5165\u8cc7\u6599\u96c6 \u9996\u5148\u6211\u5011\u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u3002\u70ba\u4e86\u65b9\u4fbf\u5206\u6790\u6211\u5011\u5c07 numpy \u683c\u5f0f\u7684\u8cc7\u6599\u8f49\u63db\u6210 DataFrame \u7684\u683c\u5f0f\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u3002\u56e0\u70ba\u900f\u904e Pandas \u7684 DataFrame \u683c\u5f0f\u6211\u5011\u66f4\u80fd\u7528\u8868\u683c\u7684\u5f62\u5f0f\u89c0\u5bdf\u8cc7\u6599\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data \u76f4\u65b9\u5716 \u76f4\u65b9\u5716\u662f\u4e00\u7a2e\u5c0d\u6578\u64da\u5206\u5e03\u60c5\u6cc1\u7684\u5716\u5f62\u8868\u793a\uff0c\u662f\u4e00\u7a2e\u4e8c\u7dad\u7d71\u8a08\u5716\u8868\u3002\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb Pandas \u5167\u5efa\u51fd\u5f0f hist() \u9032\u884c\u76f4\u65b9\u5716\u5206\u6790\u3002\u5176\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a bins(\u7bb1\u6578)\uff0c\u9810\u8a2d\u503c\u70ba 10\u3002\u5982\u679c\u8a2d\u5b9a\u7684\u8f38\u91cf\u8d8a\u5927\uff0c\u5176\u4ee3\u8868\u9700\u8981\u5206\u5272\u7684\u7cbe\u5ea6\u8d8a\u7d30\u3002\u901a\u5e38\u53d6\u4e00\u500b\u9069\u7576\u7684\u7bb1\u6578\u5373\u53ef\u89c0\u5bdf\u8a72\u7279\u5fb5\u5728\u8cc7\u6599\u96c6\u4e2d\u7684\u5206\u4f48\u60c5\u6cc1\u3002\u85c9\u7531\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u6bcf\u500b\u503c\u57df\u7684\u5206\u4f48\u5927\u5c0f\u8207\u6578\u91cf\u3002\u6211\u5011\u4e5f\u80fd\u767c\u73fe\u8f38\u51fa\u9805\u7684\u985e\u5225\u5171\u6709\u4e09\u500b\uff0c\u4e26\u4e14\u9019\u4e09\u500b\u985e\u5225\u7684\u6578\u91cf\u90fd\u525b\u597d\u5404\u6709 50 \u7b46\u8cc7\u6599\u3002\u6211\u5011\u4e5f\u80fd\u5f97\u77e5\u9019\u4e00\u4efd\u8cc7\u6599\u96c6\u7684\u8f38\u51fa\u985e\u5225\u662f\u4e00\u500b\u975e\u5e38\u5747\u52fb\u7684\u8cc7\u6599\u3002 #\u76f4\u65b9\u5716 histograms df_data . hist ( alpha = 0.6 , layout = ( 3 , 3 ), figsize = ( 12 , 8 ), bins = 10 ) plt . tight_layout () plt . show () \u6211\u5011\u4e5f\u53ef\u4ee5\u900f\u904e Seaborn \u7684 histplot \u505a\u51fa\u66f4\u8a73\u7d30\u7684\u76f4\u65b9\u5716\u5206\u6790\u3002\u4e26\u5229\u7528\u548c\u5bc6\u5ea6\u4f30\u8a08 kde=True \u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u4f48\u72c0\u6cc1\u3002 fig , axes = plt . subplots ( nrows = 1 , ncols = 4 ) fig . set_size_inches ( 15 , 4 ) sns . histplot ( df_data [ \"SepalLengthCm\" ][:], ax = axes [ 0 ], kde = True ) sns . histplot ( df_data [ \"SepalWidthCm\" ][:], ax = axes [ 1 ], kde = True ) sns . histplot ( df_data [ \"PetalLengthCm\" ][:], ax = axes [ 2 ], kde = True ) sns . histplot ( df_data [ \"PetalWidthCm\" ][:], ax = axes [ 3 ], kde = True ) \u6838\u5bc6\u5ea6\u4f30\u8a08 \u6838\u5bc6\u5ea6\u4f30\u8a08\u5206\u7232\u5169\u90e8\u5206\uff0c\u5206\u5225\u6709\u5c0d\u89d2\u7dda\u90e8\u5206\u548c\u975e\u5c0d\u89d2\u7dda\u90e8\u5206\u3002\u5728\u5c0d\u89d2\u7dda\u90e8\u5206\u662f\u4ee5\u6838\u5bc6\u5ea6\u4f30\u8a08\u5716\uff08Kernel Density Estimation\uff09\u7684\u65b9\u5f0f\u5448\u73fe\uff0c\u4e5f\u5c31\u662f\u7528\u4f86\u770b\u67d0\u4e00\u500b\u7279\u5fb5\u7684\u5206\u4f48\u60c5\u6cc1\uff0cx\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u6578\u503c\uff0cy\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u5bc6\u5ea6\u4e5f\u5c31\u662f\u7279\u5fb5\u51fa\u73fe\u7684\u983b\u7387\u3002\u5728\u975e\u5c0d\u89d2\u7dda\u7684\u90e8\u5206\u70ba\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u5206\u4f48\u7684\u95dc\u806f\u6563\u9ede\u5716\u3002\u5c07\u4efb\u610f\u5169\u500b\u7279\u5fb5\u9032\u884c\u914d\u5c0d\uff0c\u4ee5\u5176\u4e2d\u4e00\u500b\u7232\u6a6b\u5ea7\u6a19\uff0c\u53e6\u4e00\u500b\u7232\u7e31\u5ea7\u6a19\uff0c\u5c07\u6240\u6709\u7684\u6578\u64da\u9ede\u7e6a\u88fd\u5728\u5716\u4e0a\uff0c\u7528\u4f86\u8861\u91cf\u5169\u500b\u8b8a\u91cf\u7684\u95dc\u806f\u7a0b\u5ea6\u3002 \u4f7f\u7528 Pandas \u7e6a\u88fd\uff1a from pandas.plotting import scatter_matrix scatter_matrix ( df_data , figsize = ( 10 , 10 ), color = 'b' , diagonal = 'kde' ) \u4f7f\u7528 Seaborn \u7e6a\u88fd\uff1a sns . pairplot ( df_data , hue = \"Species\" , height = 2 , diag_kind = \"kde\" ) \u95dc\u806f\u5206\u6790 \u900f\u904e pandas \u7684 corr() \u51fd\u5f0f\u53ef\u4ee5\u5feb\u901f\u7684\u8a08\u7b97\u6bcf\u500b\u7279\u5fb5\u9593\u7684\u5f7c\u6b64\u95dc\u806f\u7a0b\u5ea6\u3002\u5176\u5340\u9593\u503c\u70ba-1~1\u4e4b\u9593\uff0c\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u95dc\u806f\u7a0b\u5ea6\u6b63\u76f8\u95dc\u8d8a\u9ad8\u3002\u76f8\u53cd\u7684\u7576\u8ca0\u7684\u7a0b\u5ea6\u5f88\u9ad8\u6211\u5011\u53ef\u4ee5\u89e3\u91cb\u9019\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u662f\u6709\u5f88\u9ad8\u7684\u8ca0 \u95dc\u806f\u6027\u3002 # correlation \u8a08\u7b97 corr = df_data [[ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]] . corr () plt . figure ( figsize = ( 8 , 8 )) sns . heatmap ( corr , square = True , annot = True , cmap = \"RdBu_r\" ) \u6563\u4f48\u5716 \u900f\u904e\u6563\u4f48\u5716\u6211\u5011\u53ef\u4ee5\u5f9e\u4e8c\u7dad\u7684\u5e73\u9762\u4e0a\u89c0\u5bdf\u5169\u5169\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5206\u4f48\u72c0\u6cc1\u3002\u5982\u679c\u8a72\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6\u8d8a\u9ad8\uff0c\u7fa4\u805a\u7684\u6548\u679c\u6703\u66f4\u52a0\u986f\u8457\u3002 sns . lmplot ( \"SepalLengthCm\" , \"SepalWidthCm\" , hue = 'Species' , data = df_data , fit_reg = False , legend = False ) plt . legend ( title = 'Species' , loc = 'upper right' , labels = [ 'Iris-Setosa' , 'Iris-Versicolour' , 'Iris-Virginica' ]) \u7bb1\u5f62\u5716 \u900f\u904e\u7bb1\u5f62\u5716\u53ef\u4ee5\u5206\u6790\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u4ee5\u53ca\u662f\u5426\u6709\u96e2\u7fa4\u503c\u3002\u6211\u5011\u5229\u7528\u7bb1\u5f62\u5716\u4f86\u8868\u793a\u56db\u5206\u4f4d\u6578\u4f86\u89c0\u5bdf\u6578\u64da\u5206\u6563\u60c5\u6cc1\u3002\u7bb1\u5f62\u7684\u5169\u7aef\u70ba\u7b2c\u4e00\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb25%\u4e4b\u8cc7\u6599(Q1)\u8207\u7b2c\u4e09\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb75%\u4e4b\u8cc7\u6599(Q3)\uff0c\u800c\u7bb1\u5f62\u5716\u7684\u4e2d\u9593\u7dda\u70ba\u4e2d\u4f4d\u6578\u986f\u793a\u6db5\u84cb\u524d50%\u8cc7\u6599\u4e4b\u4f4d\u7f6e\u3002\u7bb1\u5f62\u4e0a\u865b\u7dda\u7684\u7aef\u9ede\u70ba\u6975\u5927\u503c\uff0c\u7bb1\u578b\u4e0b\u865b\u7dda\u7684\u9ede\u70ba\u6975\u5c0f\u503c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427!"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#day-3","text":"","title":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427!"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_1","text":"\u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u804a\u804a\u4f55\u8b02 EDA\uff0c\u70ba\u4f55\u8981\u505a\u6578\u64da\u5206\u6790? \u64b0\u5beb\u7b2c\u4e00\u652f EDA \u7a0b\u5f0f \u900f\u904e\u9cf6\u5c3e\u82b1 (iris) \u8cc7\u6599\u96c6\uff0c\u4f86\u67e5\u770b\u8cc7\u6599\u7684\u5206\u4f48\u72c0\u614b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#eda","text":"\u63a2\u7d22\u5f0f\u8cc7\u6599\u5206\u6790 (Exploratory Data Analysis, EDA)\uff0c\u4e3b\u8981\u6982\u5ff5\u662f\u5229\u7528\u6578\u64da\u7d71\u8a08\u7684\u65b9\u5f0f\u8996\u89ba\u5316\u8cc7\u6599\u3002\u900f\u904e\u8cc7\u6599\u7684\u63a2\u7d22\u5f0f\u5206\u6790\u53ef\u4ee5\u67e5\u770b\u8cc7\u6599\u96c6\u7576\u4e2d\u6bcf\u500b\u7279\u5fb5\u5f7c\u6b64\u7684\u91cd\u8981\u7a0b\u5ea6\u4ee5\u53ca\u5176\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1\uff0c\u6709\u826f\u597d\u7684\u6578\u64da\u5206\u6790\u7fd2\u6163\u80fd\u5920\u5e6b\u52a9\u4f60\u66f4\u4e86\u89e3\u8cc7\u6599\u96c6\u7684\u7279\u6027\u3002\u53e6\u5916\u505a EDA \u7684\u597d\u8655\u662f\u53ef\u4ee5\u5f9e\u5404\u7a2e\u9762\u5411\u5148\u4e86\u89e3\u8cc7\u6599\u7684\u72c0\u6cc1\uff0c\u4ee5\u5229\u5f8c\u7e8c\u7684\u6a21\u578b\u5206\u6790\u3002","title":"\u63a2\u7d22\u5f0f\u5206\u6790 (EDA)"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#eda_1","text":"\u8cc7\u6599\u8655\u7406 \u2013 Pandas, Numpy Pandas \uff1aPython \u8868\u683c\u8cc7\u6599\u8655\u7406\u7684\u91cd\u8981\u5de5\u5177 Numpy \uff1a\u91dd\u5c0d\u591a\u7dad\u9663\u5217\u7684\u5e73\u884c\u904b\u7b97\u9032\u884c\u512a\u5316\u7684\u5f37\u5927\u51fd\u5f0f\u5eab \u7e6a\u5716\u76f8\u95dc \u2013 Matplotlib, Seaborn Matplotlib \uff1aPython \u6700\u5e38\u88ab\u4f7f\u7528\u5230\u7684\u7e6a\u5716\u5957\u4ef6 Seaborn \uff1a\u4ee5 matplotlib \u70ba\u5e95\u5c64\u7684\u9ad8\u968e\u7e6a\u5716\u5957\u4ef6","title":"EDA \u5fc5\u8981\u7684\u5957\u4ef6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_2","text":"\u6b64\u8cc7\u6599\u96c6\u7e3d\u5171\u67094\u500b\u8f38\u5165\u7279\u5fb5\u3002\u5206\u5225\u70ba\u82b1\u843c\u9577\u5ea6\u3001\u82b1\u843c\u5bec\u5ea6\u3001\u82b1\u74e3\u9577\u5ea6\u8207\u82b1\u74e3\u5bec\u5ea6\u3002\u8f38\u51fa\u7279\u5fb5\u70ba\u82b1\u6735\u7684\u54c1\u7a2e\uff0c\u5171\u6709\u4e09\u7a2e\u985e\u5225\u5206\u5225\u70ba 0: iris setosa\u3001 1: iris versicolor\u3001 2: iris virginica\u3002","title":"\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4e00\u89bd"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_3","text":"\u9996\u5148\u6211\u5011\u8f09\u5165\u8cc7\u6599\u63a2\u7d22\u5f0f\u5206\u6790\u6240\u9700\u7684\u5957\u4ef6\u3002\u5206\u5225\u6709\u9032\u884c\u6578\u64da\u8655\u7406\u7684\u51fd\u5f0f\u5eab\u7684 pandas \u3001\u9ad8\u968e\u5927\u91cf\u7684\u7dad\u5ea6\u9663\u5217\u8207\u77e9\u9663\u904b\u7b97\u7684 numpy \u3001\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7e6a\u5716\u5eab matplotlib \u8207 seaborn \u3002\u6700\u5f8c\u4e00\u500b\u662f\u8cc7\u6599\u96c6\u4f86\u6e90\uff0c\u6b64\u7cfb\u5217\u7bc4\u4f8b\u6211\u5011\u63a1\u7528 Sklearn \u6240\u63d0\u4f9b\u7684\u9cf6\u5c3e\u82b1\u5206\u985e\u7684\u8cc7\u6599\u96c6\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris","title":"\u8f09\u5165\u5fc5\u8981\u5957\u4ef6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#sklearn-toy-datasets","text":"Sklearn \u5957\u4ef6\u4e2d\u63d0\u4f9b\u4e86\u4e03\u500b\u5feb\u901f\u5165\u9580\u7684 Toy datasets \u5f88\u63a8\u85a6\u521d\u5b78\u8005\u53ef\u4ee5\u8f09\u5165\u4f86\u73a9\u73a9\u770b\uff0c\u4e26\u4e14\u7df4\u7fd2\u505a\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u6a21\u3002\u6bcf\u4e00\u500b\u8cc7\u6599\u96c6\u547c\u53eb\u7684\u65b9\u6cd5\u975e\u5e38\u7c21\u55ae\u3002\u4ee5\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u70ba\u4f8b\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e API \u53d6\u5f97\u8f38\u5165\u8207\u8f38\u51fa\u3002 from sklearn.datasets import load_iris iris = load_iris () # \u8f38\u5165\u7279\u5fb5 X = iris . data # \u8f38\u51fa\u7279\u5fb5 y = iris . target Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a API \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff1a data: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5 target: \u53d6\u5f97\u8f38\u51fa\u7279\u5fb5 feature_names: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5\u7684\u540d\u7a31 target_names: \u53d6\u5f97\u8f38\u51fa\u7684\u985e\u5225\u6a19\u7c64(\u5206\u985e\u8cc7\u6599\u96c6) DESCR: \u8cc7\u6599\u96c6\u8a73\u7d30\u63cf\u8ff0 \u5982\u679c\u60f3\u8a66\u8a66\u5176\u4ed6\u7684\u8cc7\u6599\u96c6\u53ef\u4ee5\u53c3\u8003\uff1a \u8ff4\u6b78\u554f\u984c load_boston \u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c load_diabetes \u7cd6\u5c3f\u75c5\u9810\u6e2c load_linnerud \u9ad4\u80fd\u8a55\u4f30\u9810\u6e2c \u5206\u985e\u554f\u984c load_iris \u9cf6\u5c3e\u82b1\u7a2e\u985e\u9810\u6e2c load_digits \u624b\u5beb\u6578\u5b57\u8fa8\u8b58 load_wine \u8461\u8404\u9152\u7a2e\u985e\u9810\u6e2c load_breast_cancer \u4e73\u764c\u9810\u6e2c \u53c3\u8003","title":"Sklearn Toy datasets"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_4","text":"\u9996\u5148\u6211\u5011\u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u3002\u70ba\u4e86\u65b9\u4fbf\u5206\u6790\u6211\u5011\u5c07 numpy \u683c\u5f0f\u7684\u8cc7\u6599\u8f49\u63db\u6210 DataFrame \u7684\u683c\u5f0f\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u3002\u56e0\u70ba\u900f\u904e Pandas \u7684 DataFrame \u683c\u5f0f\u6211\u5011\u66f4\u80fd\u7528\u8868\u683c\u7684\u5f62\u5f0f\u89c0\u5bdf\u8cc7\u6599\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"\u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_5","text":"\u76f4\u65b9\u5716\u662f\u4e00\u7a2e\u5c0d\u6578\u64da\u5206\u5e03\u60c5\u6cc1\u7684\u5716\u5f62\u8868\u793a\uff0c\u662f\u4e00\u7a2e\u4e8c\u7dad\u7d71\u8a08\u5716\u8868\u3002\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb Pandas \u5167\u5efa\u51fd\u5f0f hist() \u9032\u884c\u76f4\u65b9\u5716\u5206\u6790\u3002\u5176\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a bins(\u7bb1\u6578)\uff0c\u9810\u8a2d\u503c\u70ba 10\u3002\u5982\u679c\u8a2d\u5b9a\u7684\u8f38\u91cf\u8d8a\u5927\uff0c\u5176\u4ee3\u8868\u9700\u8981\u5206\u5272\u7684\u7cbe\u5ea6\u8d8a\u7d30\u3002\u901a\u5e38\u53d6\u4e00\u500b\u9069\u7576\u7684\u7bb1\u6578\u5373\u53ef\u89c0\u5bdf\u8a72\u7279\u5fb5\u5728\u8cc7\u6599\u96c6\u4e2d\u7684\u5206\u4f48\u60c5\u6cc1\u3002\u85c9\u7531\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u6bcf\u500b\u503c\u57df\u7684\u5206\u4f48\u5927\u5c0f\u8207\u6578\u91cf\u3002\u6211\u5011\u4e5f\u80fd\u767c\u73fe\u8f38\u51fa\u9805\u7684\u985e\u5225\u5171\u6709\u4e09\u500b\uff0c\u4e26\u4e14\u9019\u4e09\u500b\u985e\u5225\u7684\u6578\u91cf\u90fd\u525b\u597d\u5404\u6709 50 \u7b46\u8cc7\u6599\u3002\u6211\u5011\u4e5f\u80fd\u5f97\u77e5\u9019\u4e00\u4efd\u8cc7\u6599\u96c6\u7684\u8f38\u51fa\u985e\u5225\u662f\u4e00\u500b\u975e\u5e38\u5747\u52fb\u7684\u8cc7\u6599\u3002 #\u76f4\u65b9\u5716 histograms df_data . hist ( alpha = 0.6 , layout = ( 3 , 3 ), figsize = ( 12 , 8 ), bins = 10 ) plt . tight_layout () plt . show () \u6211\u5011\u4e5f\u53ef\u4ee5\u900f\u904e Seaborn \u7684 histplot \u505a\u51fa\u66f4\u8a73\u7d30\u7684\u76f4\u65b9\u5716\u5206\u6790\u3002\u4e26\u5229\u7528\u548c\u5bc6\u5ea6\u4f30\u8a08 kde=True \u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u4f48\u72c0\u6cc1\u3002 fig , axes = plt . subplots ( nrows = 1 , ncols = 4 ) fig . set_size_inches ( 15 , 4 ) sns . histplot ( df_data [ \"SepalLengthCm\" ][:], ax = axes [ 0 ], kde = True ) sns . histplot ( df_data [ \"SepalWidthCm\" ][:], ax = axes [ 1 ], kde = True ) sns . histplot ( df_data [ \"PetalLengthCm\" ][:], ax = axes [ 2 ], kde = True ) sns . histplot ( df_data [ \"PetalWidthCm\" ][:], ax = axes [ 3 ], kde = True )","title":"\u76f4\u65b9\u5716"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_6","text":"\u6838\u5bc6\u5ea6\u4f30\u8a08\u5206\u7232\u5169\u90e8\u5206\uff0c\u5206\u5225\u6709\u5c0d\u89d2\u7dda\u90e8\u5206\u548c\u975e\u5c0d\u89d2\u7dda\u90e8\u5206\u3002\u5728\u5c0d\u89d2\u7dda\u90e8\u5206\u662f\u4ee5\u6838\u5bc6\u5ea6\u4f30\u8a08\u5716\uff08Kernel Density Estimation\uff09\u7684\u65b9\u5f0f\u5448\u73fe\uff0c\u4e5f\u5c31\u662f\u7528\u4f86\u770b\u67d0\u4e00\u500b\u7279\u5fb5\u7684\u5206\u4f48\u60c5\u6cc1\uff0cx\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u6578\u503c\uff0cy\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u5bc6\u5ea6\u4e5f\u5c31\u662f\u7279\u5fb5\u51fa\u73fe\u7684\u983b\u7387\u3002\u5728\u975e\u5c0d\u89d2\u7dda\u7684\u90e8\u5206\u70ba\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u5206\u4f48\u7684\u95dc\u806f\u6563\u9ede\u5716\u3002\u5c07\u4efb\u610f\u5169\u500b\u7279\u5fb5\u9032\u884c\u914d\u5c0d\uff0c\u4ee5\u5176\u4e2d\u4e00\u500b\u7232\u6a6b\u5ea7\u6a19\uff0c\u53e6\u4e00\u500b\u7232\u7e31\u5ea7\u6a19\uff0c\u5c07\u6240\u6709\u7684\u6578\u64da\u9ede\u7e6a\u88fd\u5728\u5716\u4e0a\uff0c\u7528\u4f86\u8861\u91cf\u5169\u500b\u8b8a\u91cf\u7684\u95dc\u806f\u7a0b\u5ea6\u3002 \u4f7f\u7528 Pandas \u7e6a\u88fd\uff1a from pandas.plotting import scatter_matrix scatter_matrix ( df_data , figsize = ( 10 , 10 ), color = 'b' , diagonal = 'kde' ) \u4f7f\u7528 Seaborn \u7e6a\u88fd\uff1a sns . pairplot ( df_data , hue = \"Species\" , height = 2 , diag_kind = \"kde\" )","title":"\u6838\u5bc6\u5ea6\u4f30\u8a08"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_7","text":"\u900f\u904e pandas \u7684 corr() \u51fd\u5f0f\u53ef\u4ee5\u5feb\u901f\u7684\u8a08\u7b97\u6bcf\u500b\u7279\u5fb5\u9593\u7684\u5f7c\u6b64\u95dc\u806f\u7a0b\u5ea6\u3002\u5176\u5340\u9593\u503c\u70ba-1~1\u4e4b\u9593\uff0c\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u95dc\u806f\u7a0b\u5ea6\u6b63\u76f8\u95dc\u8d8a\u9ad8\u3002\u76f8\u53cd\u7684\u7576\u8ca0\u7684\u7a0b\u5ea6\u5f88\u9ad8\u6211\u5011\u53ef\u4ee5\u89e3\u91cb\u9019\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u662f\u6709\u5f88\u9ad8\u7684\u8ca0 \u95dc\u806f\u6027\u3002 # correlation \u8a08\u7b97 corr = df_data [[ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]] . corr () plt . figure ( figsize = ( 8 , 8 )) sns . heatmap ( corr , square = True , annot = True , cmap = \"RdBu_r\" )","title":"\u95dc\u806f\u5206\u6790"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_8","text":"\u900f\u904e\u6563\u4f48\u5716\u6211\u5011\u53ef\u4ee5\u5f9e\u4e8c\u7dad\u7684\u5e73\u9762\u4e0a\u89c0\u5bdf\u5169\u5169\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5206\u4f48\u72c0\u6cc1\u3002\u5982\u679c\u8a72\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6\u8d8a\u9ad8\uff0c\u7fa4\u805a\u7684\u6548\u679c\u6703\u66f4\u52a0\u986f\u8457\u3002 sns . lmplot ( \"SepalLengthCm\" , \"SepalWidthCm\" , hue = 'Species' , data = df_data , fit_reg = False , legend = False ) plt . legend ( title = 'Species' , loc = 'upper right' , labels = [ 'Iris-Setosa' , 'Iris-Versicolour' , 'Iris-Virginica' ])","title":"\u6563\u4f48\u5716"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_9","text":"\u900f\u904e\u7bb1\u5f62\u5716\u53ef\u4ee5\u5206\u6790\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u4ee5\u53ca\u662f\u5426\u6709\u96e2\u7fa4\u503c\u3002\u6211\u5011\u5229\u7528\u7bb1\u5f62\u5716\u4f86\u8868\u793a\u56db\u5206\u4f4d\u6578\u4f86\u89c0\u5bdf\u6578\u64da\u5206\u6563\u60c5\u6cc1\u3002\u7bb1\u5f62\u7684\u5169\u7aef\u70ba\u7b2c\u4e00\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb25%\u4e4b\u8cc7\u6599(Q1)\u8207\u7b2c\u4e09\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb75%\u4e4b\u8cc7\u6599(Q3)\uff0c\u800c\u7bb1\u5f62\u5716\u7684\u4e2d\u9593\u7dda\u70ba\u4e2d\u4f4d\u6578\u986f\u793a\u6db5\u84cb\u524d50%\u8cc7\u6599\u4e4b\u4f4d\u7f6e\u3002\u7bb1\u5f62\u4e0a\u865b\u7dda\u7684\u7aef\u9ede\u70ba\u6975\u5927\u503c\uff0c\u7bb1\u578b\u4e0b\u865b\u7dda\u7684\u9ede\u70ba\u6975\u5c0f\u503c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u7bb1\u5f62\u5716"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/","text":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u52d5\u624b\u90e8\u7f72\u81ea\u5df1\u7684\u6a5f\u5668\u5b78\u7fd2 API \u4f7f\u7528 Heroku \u96f2\u7aef\u5e73\u53f0\u90e8\u7f72\u61c9\u7528\u7a0b\u5f0f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code \u524d\u8a00 \u958b\u767c\u7684\u6700\u5f8c\u4e00\u54e9\u8def \u90e8\u7f72\u61c9\u7528 \u3002\u90e8\u7f72 API \u5fc5\u9808\u5728\u4e00\u500b\u7a69\u5b9a\u7684\u4f3a\u670d\u5668\u4e0a\u904b\u884c\uff0c\u5927\u591a\u6578\u4f01\u696d\u53ef\u80fd\u6703\u79df\u7528\u96f2\u7aef\u7684\u865b\u64ec\u4f3a\u670d\u5668\u3002\u5e38\u898b\u7684\u96f2\u7aef\u5e73\u53f0\u4e09\u5de8\u982d\u6709 Google Cloud Platform (GCP)\u3001Amazon Web Service (AWS) \u4ee5\u53ca Microsoft Azure\u3002\u4ee5\u4e0a\u4e09\u5bb6\u4f9b\u61c9\u5546\u90fd\u6709\u63d0\u4f9b\u514d\u8cbb\u7684\u8a66\u7528\u984d\u5ea6\u4ee5\u53ca\u90e8\u7f72\u7684\u6559\u5b78\uff0c\u53e6\u5916\u96f2\u7aef\u4f3a\u670d\u5668\u8a08\u8cbb\u7684\u65b9\u5f0f\u662f\u63a1\u7528\u591a\u5c11\u4ed8\u591a\u5c11\u7684\u6982\u5ff5\u6536\u8cbb\u3002\u82e5\u6709 GCP \u4f7f\u7528\u9700\u6c42\u53ef\u4ee5\u53c3\u8003\u6211\u904e\u53bb\u6240\u9304\u88fd\u7684\u7cfb\u5217\u6559\u5b78\u5f71\u7247 GCP\u6559\u5b78-Python \u3002 Heroku \u96f2\u7aef\u5e73\u53f0 Heroku \u662f\u4e00\u500b\u652f\u63f4\u591a\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\u7684\u96f2\u5e73\u53f0\u5373\u670d\u52d9\u3002\u4e26\u4e14\u63d0\u4f9b\u4e00\u500b~~\u514d\u8cbb~~(\u73fe\u5728\u8981\u4ed8\u8cbb\u4e86)\u7684\u96f2\u7aef\u670d\u52d9\uff0c\u9019\u500b\u96f2\u7aef\u5e73\u53f0~~\u4e00\u500b\u5e33\u865f\u53ef\u4ee5\u514d\u8cbb\u5efa\u7acb\u4e94\u500b\u5c08\u6848~~\uff0c\u96d6\u7136\u662f~~\u514d\u8cbb\u7576\u7136\u4e5f\u6709\u4f7f\u7528\u4e0a\u7684\u9650\u5236\u3002\u4f8b\u5982\uff1a(1) \u8d85\u904e 30\u5206\u9418 \u9592\u7f6e\u5c07\u6703\u9032\u5165\u7761\u7720\u72c0\u614b\uff0c\u4e4b\u5f8c\u91cd\u65b0\u555f\u52d5 API \u6642\u6703\u9700\u8981\u7b49\u5f85\u4e00\u4e9b\u6642\u9593\u624d\u6709\u56de\u61c9\u3002(2) 500MB \u7684\u5132\u5b58\u7a7a\u9593\u9650\u5236\u3002\u7576\u7136 Heroku \u4e5f\u63d0\u4f9b\u591a\u7a2e\u8a9e\u8a00\u7684\u90e8\u7f72\u74b0\u5883\u50cf\u662f Ruby\u3001Node.js\u3001PHP\u3001Go\u3001Python ...\u7b49\u3002~~ \u672c\u7bc7\u6587\u7ae0\u6703\u6559\u4f60\u5982\u4f55\u90e8\u7f72 Python \u7684 Flask API\u3002 1. \u524d\u7f6e\u4f5c\u696d 1.1) \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u9019\u4e00\u7bc7\u6587\u7ae0\u5c07\u4ee5\u82b1\u6735\u5206\u985e API \u70ba\u4f8b\uff0c\u62ff\u4e00\u500b\u5148\u5df2\u7d93\u8a13\u7df4\u597d\u7684\u6a21\u578b\u9032\u884c Python Flask API \u7684\u958b\u767c\u8207\u90e8\u7f72\u3002\u81f3\u65bc\u6a21\u578b\u7684\u8a13\u7df4\u548c Flask API \u7684\u8a73\u7d30\u5167\u5bb9\u9019\u908a\u5c31\u4e0d\u7d30\u63d0\uff0c\u82e5\u5404\u4f4d\u60f3\u4e86\u89e3\u7684\u53ef\u4ee5\u53c3\u8003\u6628\u5929\u7684\u5167\u5bb9 [Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u3002\u53e6\u5916\u5efa\u8b70\u5927\u5bb6\u53ef\u4ee5\u53c3\u8003\u4e0b\u9762\u9019\u4efd\u7a0b\u5f0f\u78bc\u9032\u884c\u4eca\u5929\u7684\u5167\u5bb9\u5be6\u4f5c\uff0c\u4f7f\u7528 GitHub \u4e26\u5c07\u7a0b\u5f0f fork \u5230\u81ea\u5df1\u7684\u5e33\u865f\u4e2d\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u4ee5\u4e0b\u7c21\u55ae\u8aaa\u660e\u5c08\u6848\u5167\u90e8\u7f72 Heroku \u7684\u91cd\u8981\u6a94\u6848\u3002 1.2 Procfile \u8a2d\u5b9a\u6a94 Procfile \u9019\u500b\u6a94\u6848\u662f\u8981\u544a\u8a34 Heroku \u8981\u5982\u4f55\u555f\u52d5\u9019\u500b web app\uff0c\u5728 Heroku \u88e1\uff0c\u57f7\u884c Python \u8981\u4f7f\u7528 Gunicorn \u4f86\u555f\u52d5 web server\u3002\u6240\u4ee5\u5728 requirements.txt \u88e1\uff0c\u8acb\u8a18\u5f97\u8981\u8f38\u5165 gunicorn\u3002Procfile \u6a94\u6848\uff0c\u7684\u5167\u5bb9\u5982\u4e0b\uff1a web gunicorn run:app 2. \u90e8\u7f72 Heroku \u5c08\u6848 2.1 \u5728 Heroku \u5efa\u7acb\u61c9\u7528\u7a0b\u5f0f \u5efa\u7acb\u5e33\u865f\u5f8c\u53f3\u4e0a\u89d2\u300c New \u300d\u4e2d\u7684\u300c Create new app \u300d\u5efa\u7acb\u7b2c\u4e00\u500b\u61c9\u7528\u7a0b\u5f0f\uff1a 2.2 \u5c08\u6848\u8207 GitHub \u9023\u52d5 \u9019\u4e00\u6b65\u9a5f\u662f\u5c07 GitHub \u4e0a\u7684\u5c08\u6848\u76f4\u63a5\u8207 Heroku \u505a\u9023\u52d5\uff0c\u4f60\u4e5f\u53ef\u4ee5\u76f4\u63a5 Fork \u9019\u500b\u5c08\u6848\u76f4\u63a5\u5be6\u4f5c\u3002\u6216\u662f\u4f60\u4e5f\u53ef\u4ee5\u900f\u904e Heroku CLI \u76f4\u63a5\u5c07\u672c\u6a5f\u7684\u7a0b\u5f0f\u78bc\u90e8\u7f72\u5230 Heroku \u4e3b\u6a5f\u4e2d\u3002\u90e8\u7f72\u968e\u6bb5\u883b\u5403\u5927\u5bb6 Git \u7248\u63a7\u7684\u80fd\u529b\uff0c\u57fa\u672c\u7684\u6559\u5b78\u9019\u88e1\u5c31\u4e0d\u8d05\u8ff0\uff0c\u60f3\u4e86\u89e3\u66f4\u591a Git \u6280\u5de7\u53ef\u4ee5 \u53c3\u8003 \u3002 \u9ede\u9078 Enable Automatic Deploys \u9023\u52d5\u5f8c\u53ef\u4ee5\u9078\u64c7\u81ea\u52d5\u90e8\u7f72\u3002\u7576\u4f60 GitHub \u5c08\u6848\u7684\u7a0b\u5f0f\u78bc\u6709\u66f4\u65b0\u6642\u4ed6\u6703\u81ea\u52d5\u5e6b\u4f60\u628a\u66f4\u65b0\u7684\u7a0b\u5f0f\u90e8\u7f72\u5230 Heroku \u4e2d\u3002 \u7531\u65bc\u81ea\u52d5\u66f4\u65b0\u8207\u90e8\u7f72\u6703\u6709\u4e0a\u9650\u6b21\u6578\uff0c\u7576\u4f60\u7684\u5c08\u6848\u5728 GitHub \u66f4\u65b0\u6b21\u6578\u592a\u983b\u7e41\u3002Heroku \u5c31\u6703\u505c\u6b62\u81ea\u52d5\u767c\u5e03\uff0c\u9019\u6642\u5019\u4f60\u4e5f\u53ef\u4ee5\u8a66\u8a66\u624b\u52d5\u90e8\u7f72\u3002 \u90e8\u7f72\u5c08\u6848 \u78ba\u8a8d\u4ee5\u4e0b\u4e8b\u60c5\u90fd\u5b8c\u6210\u5f8c\u5c31\u53ef\u4ee5\u90e8\u7f72\u7a0b\u5f0f\u56c9\uff01\u8a18\u5f97\u6211\u5011\u6709\u8ddf GitHub \u9023\u52d5\uff0c\u7576\u4f60\u7684\u5c08\u6848 git push \u5f8c Heroku \u5c31\u6703\u5e6b\u4f60\u81ea\u52d5\u90e8\u7f72\u4e86\u3002\u4f60\u53ef\u4ee5\u5f9e Activity \u5167\u770b\u5230\u90e8\u7f72\u72c0\u614b\uff0c\u4e5f\u80fd\u5f9e\u53f3\u4e0a\u89d2 More -> View logs \u89c0\u770b\u5f8c\u53f0 Log \u8a0a\u606f\u3002\u6216\u8005\u4f60\u4e5f\u53ef\u4ee5\u5f9e Deploy \u5167\u624b\u52d5\u90e8\u7f72\u4e5f\u884c\u3002 Python Flask API \u7a0b\u5f0f\u64b0\u5beb \u2705 \u5c08\u6848\u5167\u5efa\u7acb Procfile \u2705 Heroku \u5efa\u7acb\u5c08\u6848 \u2705 Heroku \u8207 GitHub\u9023\u52d5 \u2705 \u90e8\u7f72\u5b8c\u6210\u5f8c\u4f60\u53ef\u4ee5\u5728 Settings \u5167\u7684 Domains \u770b\u5230\u4f60\u7684\u96f2\u7aef\u9023\u7d50\uff0c\u9019\u500b\u9023\u7d50\u9ede\u4e0b\u53bb\u5c31\u80fd\u770b\u5230\u6211\u5011\u7684API\u56c9\uff01 https://flask-api-example-with-ml-mode.herokuapp.com \u6e2c\u8a66 API \u6628\u5929\u5df2\u7d93\u8ddf\u5927\u5bb6\u4ecb\u7d39 Postman \u7684\u4f7f\u7528\u65b9\u5f0f\u3002\u4eca\u5929\u6211\u5011\u5c31\u4f86\u8a66\u8a66\u90e8\u7f72\u5728\u96f2\u7aef\u4f3a\u670d\u5668\u7684\u7d50\u679c\uff0c\u57fa\u672c\u4e0a\u6e2c\u8a66\u7684\u65b9\u5f0f\u8ddf\u6628\u5929\u5728\u672c\u6a5f\u6e2c\u8a66\u7684\u65b9\u6cd5\u4e00\u6a21\u4e00\u6a23\u3002\u6253\u958b Postman \u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740 https://\u5c08\u6848\u540d\u7a31.herokuapp.com/predict \u3002\u4e26\u6a21\u64ec\u524d\u7aef\u4f7f\u7528\u8005\u767c\u9001\u6578\u503c Body -> raw -> JSON \u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u50b3\u7d66\u5f8c\u7aef API\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#day-30-heroku-api","text":"","title":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_1","text":"\u52d5\u624b\u90e8\u7f72\u81ea\u5df1\u7684\u6a5f\u5668\u5b78\u7fd2 API \u4f7f\u7528 Heroku \u96f2\u7aef\u5e73\u53f0\u90e8\u7f72\u61c9\u7528\u7a0b\u5f0f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_2","text":"\u958b\u767c\u7684\u6700\u5f8c\u4e00\u54e9\u8def \u90e8\u7f72\u61c9\u7528 \u3002\u90e8\u7f72 API \u5fc5\u9808\u5728\u4e00\u500b\u7a69\u5b9a\u7684\u4f3a\u670d\u5668\u4e0a\u904b\u884c\uff0c\u5927\u591a\u6578\u4f01\u696d\u53ef\u80fd\u6703\u79df\u7528\u96f2\u7aef\u7684\u865b\u64ec\u4f3a\u670d\u5668\u3002\u5e38\u898b\u7684\u96f2\u7aef\u5e73\u53f0\u4e09\u5de8\u982d\u6709 Google Cloud Platform (GCP)\u3001Amazon Web Service (AWS) \u4ee5\u53ca Microsoft Azure\u3002\u4ee5\u4e0a\u4e09\u5bb6\u4f9b\u61c9\u5546\u90fd\u6709\u63d0\u4f9b\u514d\u8cbb\u7684\u8a66\u7528\u984d\u5ea6\u4ee5\u53ca\u90e8\u7f72\u7684\u6559\u5b78\uff0c\u53e6\u5916\u96f2\u7aef\u4f3a\u670d\u5668\u8a08\u8cbb\u7684\u65b9\u5f0f\u662f\u63a1\u7528\u591a\u5c11\u4ed8\u591a\u5c11\u7684\u6982\u5ff5\u6536\u8cbb\u3002\u82e5\u6709 GCP \u4f7f\u7528\u9700\u6c42\u53ef\u4ee5\u53c3\u8003\u6211\u904e\u53bb\u6240\u9304\u88fd\u7684\u7cfb\u5217\u6559\u5b78\u5f71\u7247 GCP\u6559\u5b78-Python \u3002","title":"\u524d\u8a00"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#heroku","text":"Heroku \u662f\u4e00\u500b\u652f\u63f4\u591a\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\u7684\u96f2\u5e73\u53f0\u5373\u670d\u52d9\u3002\u4e26\u4e14\u63d0\u4f9b\u4e00\u500b~~\u514d\u8cbb~~(\u73fe\u5728\u8981\u4ed8\u8cbb\u4e86)\u7684\u96f2\u7aef\u670d\u52d9\uff0c\u9019\u500b\u96f2\u7aef\u5e73\u53f0~~\u4e00\u500b\u5e33\u865f\u53ef\u4ee5\u514d\u8cbb\u5efa\u7acb\u4e94\u500b\u5c08\u6848~~\uff0c\u96d6\u7136\u662f~~\u514d\u8cbb\u7576\u7136\u4e5f\u6709\u4f7f\u7528\u4e0a\u7684\u9650\u5236\u3002\u4f8b\u5982\uff1a(1) \u8d85\u904e 30\u5206\u9418 \u9592\u7f6e\u5c07\u6703\u9032\u5165\u7761\u7720\u72c0\u614b\uff0c\u4e4b\u5f8c\u91cd\u65b0\u555f\u52d5 API \u6642\u6703\u9700\u8981\u7b49\u5f85\u4e00\u4e9b\u6642\u9593\u624d\u6709\u56de\u61c9\u3002(2) 500MB \u7684\u5132\u5b58\u7a7a\u9593\u9650\u5236\u3002\u7576\u7136 Heroku \u4e5f\u63d0\u4f9b\u591a\u7a2e\u8a9e\u8a00\u7684\u90e8\u7f72\u74b0\u5883\u50cf\u662f Ruby\u3001Node.js\u3001PHP\u3001Go\u3001Python ...\u7b49\u3002~~ \u672c\u7bc7\u6587\u7ae0\u6703\u6559\u4f60\u5982\u4f55\u90e8\u7f72 Python \u7684 Flask API\u3002","title":"Heroku \u96f2\u7aef\u5e73\u53f0"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#1","text":"","title":"1. \u524d\u7f6e\u4f5c\u696d"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#11","text":"\u9019\u4e00\u7bc7\u6587\u7ae0\u5c07\u4ee5\u82b1\u6735\u5206\u985e API \u70ba\u4f8b\uff0c\u62ff\u4e00\u500b\u5148\u5df2\u7d93\u8a13\u7df4\u597d\u7684\u6a21\u578b\u9032\u884c Python Flask API \u7684\u958b\u767c\u8207\u90e8\u7f72\u3002\u81f3\u65bc\u6a21\u578b\u7684\u8a13\u7df4\u548c Flask API \u7684\u8a73\u7d30\u5167\u5bb9\u9019\u908a\u5c31\u4e0d\u7d30\u63d0\uff0c\u82e5\u5404\u4f4d\u60f3\u4e86\u89e3\u7684\u53ef\u4ee5\u53c3\u8003\u6628\u5929\u7684\u5167\u5bb9 [Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u3002\u53e6\u5916\u5efa\u8b70\u5927\u5bb6\u53ef\u4ee5\u53c3\u8003\u4e0b\u9762\u9019\u4efd\u7a0b\u5f0f\u78bc\u9032\u884c\u4eca\u5929\u7684\u5167\u5bb9\u5be6\u4f5c\uff0c\u4f7f\u7528 GitHub \u4e26\u5c07\u7a0b\u5f0f fork \u5230\u81ea\u5df1\u7684\u5e33\u865f\u4e2d\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u4ee5\u4e0b\u7c21\u55ae\u8aaa\u660e\u5c08\u6848\u5167\u90e8\u7f72 Heroku \u7684\u91cd\u8981\u6a94\u6848\u3002","title":"1.1) \u7bc4\u4f8b\u7a0b\u5f0f\u78bc"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#12-procfile","text":"Procfile \u9019\u500b\u6a94\u6848\u662f\u8981\u544a\u8a34 Heroku \u8981\u5982\u4f55\u555f\u52d5\u9019\u500b web app\uff0c\u5728 Heroku \u88e1\uff0c\u57f7\u884c Python \u8981\u4f7f\u7528 Gunicorn \u4f86\u555f\u52d5 web server\u3002\u6240\u4ee5\u5728 requirements.txt \u88e1\uff0c\u8acb\u8a18\u5f97\u8981\u8f38\u5165 gunicorn\u3002Procfile \u6a94\u6848\uff0c\u7684\u5167\u5bb9\u5982\u4e0b\uff1a web gunicorn run:app","title":"1.2 Procfile \u8a2d\u5b9a\u6a94"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#2-heroku","text":"","title":"2. \u90e8\u7f72 Heroku \u5c08\u6848"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#21-heroku","text":"\u5efa\u7acb\u5e33\u865f\u5f8c\u53f3\u4e0a\u89d2\u300c New \u300d\u4e2d\u7684\u300c Create new app \u300d\u5efa\u7acb\u7b2c\u4e00\u500b\u61c9\u7528\u7a0b\u5f0f\uff1a","title":"2.1 \u5728 Heroku \u5efa\u7acb\u61c9\u7528\u7a0b\u5f0f"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#22-github","text":"\u9019\u4e00\u6b65\u9a5f\u662f\u5c07 GitHub \u4e0a\u7684\u5c08\u6848\u76f4\u63a5\u8207 Heroku \u505a\u9023\u52d5\uff0c\u4f60\u4e5f\u53ef\u4ee5\u76f4\u63a5 Fork \u9019\u500b\u5c08\u6848\u76f4\u63a5\u5be6\u4f5c\u3002\u6216\u662f\u4f60\u4e5f\u53ef\u4ee5\u900f\u904e Heroku CLI \u76f4\u63a5\u5c07\u672c\u6a5f\u7684\u7a0b\u5f0f\u78bc\u90e8\u7f72\u5230 Heroku \u4e3b\u6a5f\u4e2d\u3002\u90e8\u7f72\u968e\u6bb5\u883b\u5403\u5927\u5bb6 Git \u7248\u63a7\u7684\u80fd\u529b\uff0c\u57fa\u672c\u7684\u6559\u5b78\u9019\u88e1\u5c31\u4e0d\u8d05\u8ff0\uff0c\u60f3\u4e86\u89e3\u66f4\u591a Git \u6280\u5de7\u53ef\u4ee5 \u53c3\u8003 \u3002 \u9ede\u9078 Enable Automatic Deploys \u9023\u52d5\u5f8c\u53ef\u4ee5\u9078\u64c7\u81ea\u52d5\u90e8\u7f72\u3002\u7576\u4f60 GitHub \u5c08\u6848\u7684\u7a0b\u5f0f\u78bc\u6709\u66f4\u65b0\u6642\u4ed6\u6703\u81ea\u52d5\u5e6b\u4f60\u628a\u66f4\u65b0\u7684\u7a0b\u5f0f\u90e8\u7f72\u5230 Heroku \u4e2d\u3002 \u7531\u65bc\u81ea\u52d5\u66f4\u65b0\u8207\u90e8\u7f72\u6703\u6709\u4e0a\u9650\u6b21\u6578\uff0c\u7576\u4f60\u7684\u5c08\u6848\u5728 GitHub \u66f4\u65b0\u6b21\u6578\u592a\u983b\u7e41\u3002Heroku \u5c31\u6703\u505c\u6b62\u81ea\u52d5\u767c\u5e03\uff0c\u9019\u6642\u5019\u4f60\u4e5f\u53ef\u4ee5\u8a66\u8a66\u624b\u52d5\u90e8\u7f72\u3002","title":"2.2 \u5c08\u6848\u8207 GitHub \u9023\u52d5"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_3","text":"\u78ba\u8a8d\u4ee5\u4e0b\u4e8b\u60c5\u90fd\u5b8c\u6210\u5f8c\u5c31\u53ef\u4ee5\u90e8\u7f72\u7a0b\u5f0f\u56c9\uff01\u8a18\u5f97\u6211\u5011\u6709\u8ddf GitHub \u9023\u52d5\uff0c\u7576\u4f60\u7684\u5c08\u6848 git push \u5f8c Heroku \u5c31\u6703\u5e6b\u4f60\u81ea\u52d5\u90e8\u7f72\u4e86\u3002\u4f60\u53ef\u4ee5\u5f9e Activity \u5167\u770b\u5230\u90e8\u7f72\u72c0\u614b\uff0c\u4e5f\u80fd\u5f9e\u53f3\u4e0a\u89d2 More -> View logs \u89c0\u770b\u5f8c\u53f0 Log \u8a0a\u606f\u3002\u6216\u8005\u4f60\u4e5f\u53ef\u4ee5\u5f9e Deploy \u5167\u624b\u52d5\u90e8\u7f72\u4e5f\u884c\u3002 Python Flask API \u7a0b\u5f0f\u64b0\u5beb \u2705 \u5c08\u6848\u5167\u5efa\u7acb Procfile \u2705 Heroku \u5efa\u7acb\u5c08\u6848 \u2705 Heroku \u8207 GitHub\u9023\u52d5 \u2705 \u90e8\u7f72\u5b8c\u6210\u5f8c\u4f60\u53ef\u4ee5\u5728 Settings \u5167\u7684 Domains \u770b\u5230\u4f60\u7684\u96f2\u7aef\u9023\u7d50\uff0c\u9019\u500b\u9023\u7d50\u9ede\u4e0b\u53bb\u5c31\u80fd\u770b\u5230\u6211\u5011\u7684API\u56c9\uff01 https://flask-api-example-with-ml-mode.herokuapp.com","title":"\u90e8\u7f72\u5c08\u6848"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#api","text":"\u6628\u5929\u5df2\u7d93\u8ddf\u5927\u5bb6\u4ecb\u7d39 Postman \u7684\u4f7f\u7528\u65b9\u5f0f\u3002\u4eca\u5929\u6211\u5011\u5c31\u4f86\u8a66\u8a66\u90e8\u7f72\u5728\u96f2\u7aef\u4f3a\u670d\u5668\u7684\u7d50\u679c\uff0c\u57fa\u672c\u4e0a\u6e2c\u8a66\u7684\u65b9\u5f0f\u8ddf\u6628\u5929\u5728\u672c\u6a5f\u6e2c\u8a66\u7684\u65b9\u6cd5\u4e00\u6a21\u4e00\u6a23\u3002\u6253\u958b Postman \u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740 https://\u5c08\u6848\u540d\u7a31.herokuapp.com/predict \u3002\u4e26\u6a21\u64ec\u524d\u7aef\u4f7f\u7528\u8005\u767c\u9001\u6578\u503c Body -> raw -> JSON \u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u50b3\u7d66\u5f8c\u7aef API\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6e2c\u8a66 API"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/","text":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8cc7\u6599\u5982\u4f55\u6e05\u7406 \u4ec0\u9ebc\u662f\u8cc7\u6599\u6e05\u7406\uff1f \u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u5f0f \u70ba\u4ec0\u9ebc\u8cc7\u6599\u8981\u524d\u8655\u7406\u5462\uff1f\u524d\u8655\u88e1\u6709\u4f55\u597d\u8655\uff1f \u5b78\u7fd2 Sklearn \u4e2d\u56db\u7a2e\u4e0d\u540c\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f StandardScaler (\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee) MinMaxScaler(\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316) MaxAbsScaler\uff08\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316\uff09 RobustScaler \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5f88\u591a\u6f14\u7b97\u6cd5\u5c0d\u6578\u64da\u7bc4\u570d\u975e\u5e38\u7684\u654f\u611f\u3002\u56e0\u6b64\u70ba\u4e86\u8981\u8b93\u6a21\u578b\u8a13\u7df4\u7684\u66f4\u5f37\u5927\uff0c\u901a\u5e38\u7684\u505a\u6cd5\u662f\u5c0d\u7279\u5fb5\u9032\u884c\u8abf\u7bc0\uff0c\u4f7f\u5f97\u6578\u64da\u66f4\u9069\u5408\u9019\u4e9b\u6f14\u7b97\u6cd5\u3002\u4e00\u822c\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a5f\u5668\u5b78\u7fd2\u6642\u5f80\u5f80\u6703\u505a\u7279\u5fb5\u7684\u6b63\u898f\u5316\u3002 \u8f09\u5165\u76f8\u95dc\u5957\u4ef6 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris np . set_printoptions ( suppress = True ) 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u4eca\u5929\u7684\u7bc4\u4f8b\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u4f8b\u5b50\uff0c\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u8cc7\u6599\u6b63\u898f\u5316\u7684\u793a\u7bc4\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data 2) \u6aa2\u67e5\u7f3a\u5931\u503c \u4f7f\u7528 numpy \u6240\u63d0\u4f9b\u7684\u51fd\u5f0f\u4f86\u6aa2\u67e5\u662f\u5426\u6709 NA \u7f3a\u5931\u503c\uff0c\u5047\u8a2d\u6709\u7f3a\u5931\u503c\u4f7f\u7528 dropna() \u4f86\u79fb\u9664\u3002\u4f7f\u7528\u7684\u6642\u6a5f\u5728\u65bc\u7576\u53ea\u6709\u5c11\u91cf\u7684\u7f3a\u5931\u503c\u9069\u7528\uff0c\u82e5\u9047\u5230\u6709\u5927\u91cf\u7f3a\u5931\u503c\u7684\u60c5\u6cc1\uff0c\u6216\u662f\u672c\u8eab\u7684\u8cc7\u6599\u91cf\u5c31\u5f88\u5c11\u7684\u60c5\u6cc1\u4e0b\u5efa\u8b70\u53ef\u4ee5\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u88dc\u503c\u4f86\u9810\u6e2c\u7f3a\u5931\u503c\u3002 X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] # checked missing data print ( \"checked missing data(NAN mount):\" , len ( np . where ( np . isnan ( X ))[ 0 ])) \u8f38\u51fa\u7d50\u679c\uff1a checked missing data(NAN mount): 0 \u7531\u65bc Sklearn \u6240\u63d0\u4f9b\u7684\u8cc7\u6599\u96c6\u975e\u5e38\u4e7e\u6de8\uff0c\u82e5\u4f60\u6536\u96c6\u5230\u7684\u8cc7\u6599\u6709\u8a31\u591a\u7684\u7f3a\u5931\u503c\u6216\u662f\u672c\u8eab\u8cc7\u6599\u91cf\u5c31\u4e0d\u591a\u7684\u5f37\u6cc1\u4e0b\uff0c\u5efa\u8b70\u597d\u597d\u7684\u53bb\u8655\u7406\u9019\u4e9b\u7f3a\u6f0f\u7684\u503c\u3002\u901a\u5e38\u88dc\u503c\u7684\u65b9\u6cd5\u53ef\u5206\u70ba\u624b\u52d5\u586b\u503c\u8207\u63d2\u503c\u6cd5\u3002\u9996\u5148\u624b\u52d5\u586b\u503c\u53ef\u4ee5\u4ee5\u8a72\u6b04\u4f4d\u6240\u6709\u8cc7\u6599\u7684\u7b97\u8853\u5e73\u5747\u6578\u6216\u4e2d\u4f4d\u6578\u505a\u586b\u88dc\u7684\u4f9d\u64da\u3002\u518d\u8005\u4f7f\u7528\u4ee5\u51fa\u73fe\u983b\u7387\u6700\u9ad8\u7684\u503c\u505a\u586b\u88dc\u4e5f\u662f\u5e38\u898b\u7684\u88dc\u503c\u65b9\u5f0f\u3002\u53e6\u4e00\u7a2e\u5dee\u503c\u6cd5\u662f\u900f\u904e\u6642\u9593\u6216\u7a7a\u9593\u4e0a\u7684\u6280\u5de7\u8655\u7406\u9019\u4e9b\u7f3a\u503c\uff0c\u4f8b\u5982\u7576\u8cc7\u6599\u662f\u6709\u6642\u9593\u5e8f\u5217\u7684\u56e0\u7d20\u5b58\u5728\u6642\uff0c\u53ef\u4ee5\u5229\u7528\u8a72\u7b46\u7f3a\u5931\u6b04\u4f4d\u9644\u8fd1\u7684\u6642\u9593\u9ede\u7684\u8cc7\u6599\u52a0\u7e3d\u4e26\u5e73\u5747\u3002 3) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684 train_test_split() \u65b9\u6cd5\u4f86\u70ba\u6211\u5011\u7684\u8cc7\u6599\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e00\u4e9b\u53c3\u6578\u4f86\u8b93\u6211\u5011\u5207\u5272\u7684\u8cc7\u6599\u66f4\u591a\u6a23\u6027\u3002\u5176\u4e2d test_size \u53c3\u6578\u5c31\u662f\u8a2d\u5b9a\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\uff0c\u7bc4\u4f8b\u4e2d\u6211\u5011\u8a2d\u5b9a 0.3 \u5373\u4ee3\u8868\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u70ba 7:3\u3002\u53e6\u5916\u9810\u8a2d\u8cc7\u6599\u5207\u5272\u7684\u65b9\u5f0f\u662f\u96a8\u6a5f\u5207\u5272 shuffle=True \u5c0d\u539f\u59cb\u6578\u64da\u9032\u884c\u96a8\u6a5f\u62bd\u6a23\uff0c\u4ee5\u4fdd\u8b49\u96a8\u6a5f\u6027\u3002\u82e5\u60f3\u8981\u6bcf\u6b21\u7a0b\u5f0f\u57f7\u884c\u6642\u5207\u5272\u7d50\u679c\u90fd\u662f\u4e00\u6a23\u7684\u53ef\u4ee5\u8a2d\u5b9a\u4e82\u6578\u96a8\u6a5f\u7a2e\u5b50 random_state \u4e26\u7d66\u4e88\u4e00\u500b\u96a8\u6a5f\u6578\u503c\u3002\u6700\u5f8c\u4e00\u500b\u662f stratify \u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\uff0c\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\u3002\u4f7f\u7528\u6642\u6a5f\u662f\u78ba\u4fdd\u5206\u985e\u554f\u984c y \u7684\u985e\u5225\u6578\u91cf\u5206\u4f48\u8981\u8207\u539f\u8cc7\u6599\u96c6\u4e00\u81f4\u3002\u4ee5\u514d\u8cc7\u6599\u96c6\u5207\u5272\u4e0d\u5e73\u5747\u5c0e\u81f4\u6a21\u578b\u8a13\u7df4\u6642\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4) Standardization\u5e73\u5747&\u8b8a\u7570\u6578\u6a19\u6e96\u5316 \u5c07\u6240\u6709\u7279\u5fb5\u6a19\u6e96\u5316\uff0c\u4e5f\u5c31\u662f\u9ad8\u65af\u5206\u4f48\u3002\u4f7f\u5f97\u6578\u64da\u7684\u5e73\u5747\u503c\u70ba 0\uff0c\u65b9\u5dee\u70ba 1\u3002\u9069\u5408\u7684\u4f7f\u7528\u6642\u6a5f\u65bc\u7576\u6709\u4e9b\u7279\u5fb5\u7684\u65b9\u5dee\u904e\u5927\u6642\uff0c\u4f7f\u7528\u6a19\u6e96\u5316\u80fd\u5920\u6709\u6548\u5730\u8b93\u6a21\u578b\u5feb\u901f\u6536\u6582\u3002 from sklearn.preprocessing import StandardScaler scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled\u4e4b\u5f8c\u7684\u8cc7\u6599\u96f6\u5747\u503c\uff0c\u55ae\u4f4d\u65b9\u5dee print ( '\u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : ' , X_train . mean ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : ' , X_train . std ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : ' , X_train_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_train_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : [5.87333333 3.0552381 3.7847619 1.20571429] \u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : [0.85882164 0.45502087 1.77553646 0.77383751] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : [ 0. -0. -0. -0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6 X \u7684\u6a19\u6e96\u5dee : [1. 1. 1. 1.] \u8a13\u7df4\u96c6\u7684 Scaler \u64ec\u5408\u5b8c\u6210\u5f8c\uff0c\u6211\u5011\u5c31\u80fd\u505a\u76f8\u540c\u7684\u8f49\u63db\u5728\u6e2c\u8a66\u96c6\u4e0a\u3002 X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : ' , X_test_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_test_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : [0.40925926 0.44259259 0.44750958 0.45185185] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : [0.20457725 0.15915694 0.29647499 0.30224923] \u5982\u679c\u60f3\u5c07\u8f49\u63db\u5f8c\u7684\u8cc7\u6599\u9084\u539f\u53ef\u4ee5\u4f7f\u7528 inverse_transform() \u5c07\u6578\u503c\u9084\u539f\u6210\u539f\u672c\u7684\u8f38\u5165\u3002 # \u5c07\u7e2e\u653e\u7684\u8cc7\u6599\u9084\u539f X_test_inverse = scaler . inverse_transform ( X_test_scaled ) MinMaxScaler\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316 \u5728 MinMaxScaler \u4e2d\u662f\u7d66\u5b9a\u4e86\u4e00\u500b\u660e\u78ba\u7684\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002\u6bcf\u500b\u7279\u5fb5\u4e2d\u7684\u6700\u5c0f\u503c\u8b8a\u6210\u4e860\uff0c\u6700\u5927\u503c\u8b8a\u6210\u4e861\u3002\u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[0,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled \u4e4b\u5f8c\u7684\u8cc7\u6599\u6700\u5c0f\u503c\u3001\u6700\u5927\u503c print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : ' , X_train . min ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : ' , X_train . max ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : ' , X_train_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : ' , X_train_scaled . max ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : [4.3 2. 1.1 0.1] \u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : [7.9 4.4 6.9 2.5] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : [0. 0. 0. 0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : [1. 1. 1. 1.] X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : ' , X_test_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : ' , X_test_scaled . max ( axis = 0 )) StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : [ 0.02777778 0.125 -0.01724138 0.04166667] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : [0.83333333 0.83333333 0.89655172 0.95833333] MaxAbsScaler\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316 MaxAbsScaler \u8207 MinMaxScaler \u985e\u4f3c\uff0c\u6240\u6709\u6578\u64da\u90fd\u6703\u9664\u4ee5\u8a72\u5217\u7d55\u5c0d\u503c\u5f8c\u7684\u6700\u5927\u503c\u3002 \u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[-1,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MaxAbsScaler scaler = MaxAbsScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler . transform ( X_test ) RobustScaler \u53ef\u4ee5\u6709\u6548\u7684\u7e2e\u653e\u5e36\u6709 outlier \u7684\u6578\u64da\uff0c\u900f\u904e Robust \u5982\u679c\u6578\u64da\u4e2d\u542b\u6709\u7570\u5e38\u503c\u5728\u7e2e\u653e\u4e2d\u6703\u6368\u53bb\u3002 from sklearn.preprocessing import RobustScaler scaler = RobustScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler.transform(X_test) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#day-4","text":"","title":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_1","text":"\u8cc7\u6599\u5982\u4f55\u6e05\u7406 \u4ec0\u9ebc\u662f\u8cc7\u6599\u6e05\u7406\uff1f \u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u5f0f \u70ba\u4ec0\u9ebc\u8cc7\u6599\u8981\u524d\u8655\u7406\u5462\uff1f\u524d\u8655\u88e1\u6709\u4f55\u597d\u8655\uff1f \u5b78\u7fd2 Sklearn \u4e2d\u56db\u7a2e\u4e0d\u540c\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f StandardScaler (\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee) MinMaxScaler(\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316) MaxAbsScaler\uff08\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316\uff09 RobustScaler \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_2","text":"\u5f88\u591a\u6f14\u7b97\u6cd5\u5c0d\u6578\u64da\u7bc4\u570d\u975e\u5e38\u7684\u654f\u611f\u3002\u56e0\u6b64\u70ba\u4e86\u8981\u8b93\u6a21\u578b\u8a13\u7df4\u7684\u66f4\u5f37\u5927\uff0c\u901a\u5e38\u7684\u505a\u6cd5\u662f\u5c0d\u7279\u5fb5\u9032\u884c\u8abf\u7bc0\uff0c\u4f7f\u5f97\u6578\u64da\u66f4\u9069\u5408\u9019\u4e9b\u6f14\u7b97\u6cd5\u3002\u4e00\u822c\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a5f\u5668\u5b78\u7fd2\u6642\u5f80\u5f80\u6703\u505a\u7279\u5fb5\u7684\u6b63\u898f\u5316\u3002","title":"\u524d\u8a00"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_3","text":"import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris np . set_printoptions ( suppress = True )","title":"\u8f09\u5165\u76f8\u95dc\u5957\u4ef6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#1","text":"\u4eca\u5929\u7684\u7bc4\u4f8b\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u4f8b\u5b50\uff0c\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u8cc7\u6599\u6b63\u898f\u5316\u7684\u793a\u7bc4\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#2","text":"\u4f7f\u7528 numpy \u6240\u63d0\u4f9b\u7684\u51fd\u5f0f\u4f86\u6aa2\u67e5\u662f\u5426\u6709 NA \u7f3a\u5931\u503c\uff0c\u5047\u8a2d\u6709\u7f3a\u5931\u503c\u4f7f\u7528 dropna() \u4f86\u79fb\u9664\u3002\u4f7f\u7528\u7684\u6642\u6a5f\u5728\u65bc\u7576\u53ea\u6709\u5c11\u91cf\u7684\u7f3a\u5931\u503c\u9069\u7528\uff0c\u82e5\u9047\u5230\u6709\u5927\u91cf\u7f3a\u5931\u503c\u7684\u60c5\u6cc1\uff0c\u6216\u662f\u672c\u8eab\u7684\u8cc7\u6599\u91cf\u5c31\u5f88\u5c11\u7684\u60c5\u6cc1\u4e0b\u5efa\u8b70\u53ef\u4ee5\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u88dc\u503c\u4f86\u9810\u6e2c\u7f3a\u5931\u503c\u3002 X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] # checked missing data print ( \"checked missing data(NAN mount):\" , len ( np . where ( np . isnan ( X ))[ 0 ])) \u8f38\u51fa\u7d50\u679c\uff1a checked missing data(NAN mount): 0 \u7531\u65bc Sklearn \u6240\u63d0\u4f9b\u7684\u8cc7\u6599\u96c6\u975e\u5e38\u4e7e\u6de8\uff0c\u82e5\u4f60\u6536\u96c6\u5230\u7684\u8cc7\u6599\u6709\u8a31\u591a\u7684\u7f3a\u5931\u503c\u6216\u662f\u672c\u8eab\u8cc7\u6599\u91cf\u5c31\u4e0d\u591a\u7684\u5f37\u6cc1\u4e0b\uff0c\u5efa\u8b70\u597d\u597d\u7684\u53bb\u8655\u7406\u9019\u4e9b\u7f3a\u6f0f\u7684\u503c\u3002\u901a\u5e38\u88dc\u503c\u7684\u65b9\u6cd5\u53ef\u5206\u70ba\u624b\u52d5\u586b\u503c\u8207\u63d2\u503c\u6cd5\u3002\u9996\u5148\u624b\u52d5\u586b\u503c\u53ef\u4ee5\u4ee5\u8a72\u6b04\u4f4d\u6240\u6709\u8cc7\u6599\u7684\u7b97\u8853\u5e73\u5747\u6578\u6216\u4e2d\u4f4d\u6578\u505a\u586b\u88dc\u7684\u4f9d\u64da\u3002\u518d\u8005\u4f7f\u7528\u4ee5\u51fa\u73fe\u983b\u7387\u6700\u9ad8\u7684\u503c\u505a\u586b\u88dc\u4e5f\u662f\u5e38\u898b\u7684\u88dc\u503c\u65b9\u5f0f\u3002\u53e6\u4e00\u7a2e\u5dee\u503c\u6cd5\u662f\u900f\u904e\u6642\u9593\u6216\u7a7a\u9593\u4e0a\u7684\u6280\u5de7\u8655\u7406\u9019\u4e9b\u7f3a\u503c\uff0c\u4f8b\u5982\u7576\u8cc7\u6599\u662f\u6709\u6642\u9593\u5e8f\u5217\u7684\u56e0\u7d20\u5b58\u5728\u6642\uff0c\u53ef\u4ee5\u5229\u7528\u8a72\u7b46\u7f3a\u5931\u6b04\u4f4d\u9644\u8fd1\u7684\u6642\u9593\u9ede\u7684\u8cc7\u6599\u52a0\u7e3d\u4e26\u5e73\u5747\u3002","title":"2) \u6aa2\u67e5\u7f3a\u5931\u503c"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#3","text":"\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684 train_test_split() \u65b9\u6cd5\u4f86\u70ba\u6211\u5011\u7684\u8cc7\u6599\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e00\u4e9b\u53c3\u6578\u4f86\u8b93\u6211\u5011\u5207\u5272\u7684\u8cc7\u6599\u66f4\u591a\u6a23\u6027\u3002\u5176\u4e2d test_size \u53c3\u6578\u5c31\u662f\u8a2d\u5b9a\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\uff0c\u7bc4\u4f8b\u4e2d\u6211\u5011\u8a2d\u5b9a 0.3 \u5373\u4ee3\u8868\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u70ba 7:3\u3002\u53e6\u5916\u9810\u8a2d\u8cc7\u6599\u5207\u5272\u7684\u65b9\u5f0f\u662f\u96a8\u6a5f\u5207\u5272 shuffle=True \u5c0d\u539f\u59cb\u6578\u64da\u9032\u884c\u96a8\u6a5f\u62bd\u6a23\uff0c\u4ee5\u4fdd\u8b49\u96a8\u6a5f\u6027\u3002\u82e5\u60f3\u8981\u6bcf\u6b21\u7a0b\u5f0f\u57f7\u884c\u6642\u5207\u5272\u7d50\u679c\u90fd\u662f\u4e00\u6a23\u7684\u53ef\u4ee5\u8a2d\u5b9a\u4e82\u6578\u96a8\u6a5f\u7a2e\u5b50 random_state \u4e26\u7d66\u4e88\u4e00\u500b\u96a8\u6a5f\u6578\u503c\u3002\u6700\u5f8c\u4e00\u500b\u662f stratify \u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\uff0c\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\u3002\u4f7f\u7528\u6642\u6a5f\u662f\u78ba\u4fdd\u5206\u985e\u554f\u984c y \u7684\u985e\u5225\u6578\u91cf\u5206\u4f48\u8981\u8207\u539f\u8cc7\u6599\u96c6\u4e00\u81f4\u3002\u4ee5\u514d\u8cc7\u6599\u96c6\u5207\u5272\u4e0d\u5e73\u5747\u5c0e\u81f4\u6a21\u578b\u8a13\u7df4\u6642\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4)","title":"3) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#standardization","text":"\u5c07\u6240\u6709\u7279\u5fb5\u6a19\u6e96\u5316\uff0c\u4e5f\u5c31\u662f\u9ad8\u65af\u5206\u4f48\u3002\u4f7f\u5f97\u6578\u64da\u7684\u5e73\u5747\u503c\u70ba 0\uff0c\u65b9\u5dee\u70ba 1\u3002\u9069\u5408\u7684\u4f7f\u7528\u6642\u6a5f\u65bc\u7576\u6709\u4e9b\u7279\u5fb5\u7684\u65b9\u5dee\u904e\u5927\u6642\uff0c\u4f7f\u7528\u6a19\u6e96\u5316\u80fd\u5920\u6709\u6548\u5730\u8b93\u6a21\u578b\u5feb\u901f\u6536\u6582\u3002 from sklearn.preprocessing import StandardScaler scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled\u4e4b\u5f8c\u7684\u8cc7\u6599\u96f6\u5747\u503c\uff0c\u55ae\u4f4d\u65b9\u5dee print ( '\u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : ' , X_train . mean ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : ' , X_train . std ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : ' , X_train_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_train_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : [5.87333333 3.0552381 3.7847619 1.20571429] \u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : [0.85882164 0.45502087 1.77553646 0.77383751] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : [ 0. -0. -0. -0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6 X \u7684\u6a19\u6e96\u5dee : [1. 1. 1. 1.] \u8a13\u7df4\u96c6\u7684 Scaler \u64ec\u5408\u5b8c\u6210\u5f8c\uff0c\u6211\u5011\u5c31\u80fd\u505a\u76f8\u540c\u7684\u8f49\u63db\u5728\u6e2c\u8a66\u96c6\u4e0a\u3002 X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : ' , X_test_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_test_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : [0.40925926 0.44259259 0.44750958 0.45185185] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : [0.20457725 0.15915694 0.29647499 0.30224923] \u5982\u679c\u60f3\u5c07\u8f49\u63db\u5f8c\u7684\u8cc7\u6599\u9084\u539f\u53ef\u4ee5\u4f7f\u7528 inverse_transform() \u5c07\u6578\u503c\u9084\u539f\u6210\u539f\u672c\u7684\u8f38\u5165\u3002 # \u5c07\u7e2e\u653e\u7684\u8cc7\u6599\u9084\u539f X_test_inverse = scaler . inverse_transform ( X_test_scaled )","title":"Standardization\u5e73\u5747&\u8b8a\u7570\u6578\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#minmaxscaler","text":"\u5728 MinMaxScaler \u4e2d\u662f\u7d66\u5b9a\u4e86\u4e00\u500b\u660e\u78ba\u7684\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002\u6bcf\u500b\u7279\u5fb5\u4e2d\u7684\u6700\u5c0f\u503c\u8b8a\u6210\u4e860\uff0c\u6700\u5927\u503c\u8b8a\u6210\u4e861\u3002\u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[0,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled \u4e4b\u5f8c\u7684\u8cc7\u6599\u6700\u5c0f\u503c\u3001\u6700\u5927\u503c print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : ' , X_train . min ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : ' , X_train . max ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : ' , X_train_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : ' , X_train_scaled . max ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : [4.3 2. 1.1 0.1] \u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : [7.9 4.4 6.9 2.5] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : [0. 0. 0. 0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : [1. 1. 1. 1.] X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : ' , X_test_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : ' , X_test_scaled . max ( axis = 0 )) StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : [ 0.02777778 0.125 -0.01724138 0.04166667] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : [0.83333333 0.83333333 0.89655172 0.95833333]","title":"MinMaxScaler\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#maxabsscaler","text":"MaxAbsScaler \u8207 MinMaxScaler \u985e\u4f3c\uff0c\u6240\u6709\u6578\u64da\u90fd\u6703\u9664\u4ee5\u8a72\u5217\u7d55\u5c0d\u503c\u5f8c\u7684\u6700\u5927\u503c\u3002 \u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[-1,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MaxAbsScaler scaler = MaxAbsScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler . transform ( X_test )","title":"MaxAbsScaler\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#robustscaler","text":"\u53ef\u4ee5\u6709\u6548\u7684\u7e2e\u653e\u5e36\u6709 outlier \u7684\u6578\u64da\uff0c\u900f\u904e Robust \u5982\u679c\u6578\u64da\u4e2d\u542b\u6709\u7570\u5e38\u503c\u5728\u7e2e\u653e\u4e2d\u6703\u6368\u53bb\u3002 from sklearn.preprocessing import RobustScaler scaler = RobustScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler.transform(X_test) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"RobustScaler"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/","text":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u6a5f\u5668\u5b78\u7fd2\u662f\u4ec0\u9ebc \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u8cc7\u6599\u79d1\u5b78\u4e09\u528d\u5ba2 \u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u6709\u54ea\u4e9b\uff1f \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u8a8d\u8b58\u4ec0\u9ebc\u662f\u8cc7\u6599 \u6a5f\u5668\u5b78\u7fd2\u7684\u6d41\u7a0b \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u627e\u51fa\u89e3\u6c7a\u554f\u984c\u7684\u65b9\u6cd5\u3002\u7c21\u55ae\u4f86\u8aaa\u4f60\u53ea\u8981\u5c07\u5927\u91cf\u7684\u8cc7\u6599\u9935\u7d66\u96fb\u8166\uff0c\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u6703\u70ba\u4f60\u91cf\u8eab\u6253\u9020\u5b78\u7fd2\u51fa\u4e00\u500b\u7279\u5b9a\u7684\u6a21\u578b\u7d66\u4f60\uff0c\u800c\u4e0d\u662f\u518d\u900f\u904e\u4eba\u985e\u624b\u52d5\u7684\u7d66\u4e88\u898f\u5247\u3002\u900f\u904e\u4e00\u5806\u8cc7\u6599\u6709\u6a19\u7c64\u7d66\u7b54\u6848\uff0c\u4e26\u5f9e\u8cc7\u6599\u96c6\u5b78\u7fd2\u8207\u6a19\u8a18\u9593\u7684\u95dc\u806f\uff0c\u6700\u5f8c\u518d\u5f9e\u975e\u7279\u5b9a\u8cc7\u6599\u53bb\u8fa8\u8a8d\u7b54\u6848\u3002 \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u5176\u5be6\u4eba\u5de5\u667a\u6167\u7684\u61c9\u7528\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\u96a8\u8655\u53ef\u898b\uff0c\u5f9e\u88fd\u9020\u3001\u91ab\u7642\u3001\u91d1\u878d\u3001\u4ea4\u901a\u3001\u5b89\u9632\u3001 \u96f6\u552e\u3001\u7269\u6d41\u3001\u8fb2\u696d......\u7b49\u90fd\u53ef\u4ee5\u770b\u5230\u8207 AI \u7684\u76f8\u95dc\u61c9\u7528\u3002\u7576\u7136\u4eba\u5de5\u667a\u6167\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u66c7\u82b1\u4e00\u73fe\uff0cArtificial Intelligence \u9019\u4e00\u8a5e\u5176\u5be6\u65e9\u5728 20 \u4e16\u7d00\u4e2d\u5c31\u88ab\u63d0\u51fa\uff0c\u8d77\u521d\u7576\u7136\u4e0d\u88ab\u770b\u597d\u751a\u81f3\u5927\u5bb6\u90fd\u89ba\u5f97\u8981\u4e00\u500b\u6a5f\u5668\u4eba\u5b78\u6703\u4eba\u985e\u7684\u667a\u6167\u662f\u5929\u65b9\u591c\u8b5a\u7684\u4e8b\u60c5\uff0c\u4e2d\u9593\u4e5f\u7d93\u6b77\u597d\u5e7e\u6b21 AI \u5bd2\u51ac\uff0c\u73fe\u5728\u56de\u904e\u982d\u4f86\u770b AI \u7684\u7814\u7a76\u9818\u57df\u8d77\u4f0f\u4f0f\u3002\u4e0d\u904e\u96a8\u8457\u8edf\u786c\u9ad4\u7684\u9032\u6b65\uff0c\u9010\u6f38\u4f7f\u5f97\u9700\u8981\u5927\u91cf\u8a08\u7b97\u7684\u4eba\u5de5\u667a\u6167\u6280\u8853\u6162\u6162\u7684\u88ab\u6316\u6398\u51fa\u4f86\u3002\u8fd1\u5e74\u4f86 AI \u65b0\u5275\u5982\u8207\u6625\u7b4d\u822c\u5192\u51fa\uff0c\u667a\u6167\u6a5f\u5668\u4eba\u3001\u611f\u77e5\u8b58\u5225\u3001\u81ea\u7136\u8a9e\u8a00\u8655\u7406\u3001\u5c0d\u8a71\u5ba2\u670d\u3001\u81ea\u52d5\u99d5\u99db\u3001\u7455\u75b5\u6aa2\u6e2c\u3001\u9810\u9632\u6027\u7dad\u4fee\u3001\u81ea\u52d5\u6d41\u7a0b\u63a7\u5236\u3001\u539f\u6599\u7d44\u5408\u6700\u4f73\u5316......\u7b49\u3002 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u5176\u5be6\u4eba\u5de5\u667a\u6167\u9019\u9805\u9818\u57df\u53c8\u5206\u6210\u5f88\u591a\u9580\u6d3e\uff0c\u5f9e\u6700\u65e9\u7684\u7b26\u865f\u908f\u8f2f\u3001\u5c08\u5bb6\u7cfb\u7d71\u958b\u59cb\u8aaa\u8d77\u3002\u65e9\u671f\u7684 AI \u662f\u5c07\u4eba\u985e\u7684\u5c08\u5bb6\u77e5\u8b58\u900f\u904e\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\u653e\u5230\u6a5f\u5668\u4eba\u7684\u5927\u8166\u4e2d\uff0c\u4e26\u8ce6\u4e88\u6a5f\u5668\u4eba\u667a\u6167\u4f7f\u5f97\u6709\u80fd\u529b\u5224\u65b7\u4e8b\u7269\u3002\u7576\u7136\u4eba\u985e\u5c08\u5bb6\u7684\u77e5\u8b58\u59cb\u7d42\u6709\u9650\uff0c\u96a8\u8457\u7db2\u8def\u8207\u500b\u4eba\u96fb\u8166\u666e\u53ca\u4e26\u9032\u5165\u4e86\u5927\u6578\u64da\u6642\u4ee3\u3002\u5404\u500b\u79d1\u5b78\u5bb6\u65bc\u662f\u958b\u59cb\u601d\u8003\u5982\u4f55\u5c07\u9019\u4e9b\u641c\u96c6\u4f86\u7684\u5927\u91cf\u6578\u64da\u9032\u884c\u61c9\u7528\u8207\u5206\u6790\uff1f\u6a5f\u5668\u5b78\u7fd2\u4e00\u8a5e\u5c31\u51fa\u73fe\u4e86\uff0c\u76ee\u6a19\u662f\u900f\u904e\u73fe\u5be6\u751f\u6d3b\u4e2d\u6240\u6536\u96c6\u7684\u8cc7\u6599\uff0c\u642d\u914d\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u51fa\u4f86\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u6a5f\u5668\u4eba\u6709\u5224\u65b7\u8207\u9810\u6e2c\u7684\u80fd\u529b\u3002\u7576\u7136\u8fd1\u5e7e\u5e74\u71b1\u9580\u7684\u6df1\u5ea6\u5b78\u7fd2\u5176\u5be6\u50c5\u662f\u500b\u6a5f\u5668\u5b78\u7fd2\u88e1\u9762\u7684\u5176\u4e2d\u4e00\u7a2e\u5b78\u7fd2\u7684\u65b9\u6cd5\uff0c\u4ed6\u662f\u6a21\u4eff\u4eba\u985e\u7684\u795e\u7d93\u7cfb\u7d71\uff0c\u900f\u904e\u5927\u91cf\u7684\u795e\u7d93\u5143\u8207\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u5efa\u69cb\u51fa\u4f86\u7684\u8907\u96dc\u6578\u5b78\u6a21\u578b\u3002\u7136\u800c\u5728\u672c\u7cfb\u5217\u6559\u5b78\u4e2d\u6211\u5011\u6703\u5f9e\u6700\u57fa\u790e\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u958b\u59cb\u63d0\u8d77\uff0c\u4e26\u4e00\u6b65\u4e00\u6b65\u7684\u5e36\u9818\u8b80\u8005\u6210\u70ba\u4e00\u4f4d\u771f\u6b63\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u3002 \u8cc7\u6599\u79d1\u5b78 \u2715 \u4e09\u528d\u5ba2 \u8cc7\u6599\u79d1\u5b78\u4e3b\u8981\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u6280\u8853\uff0c\u8b93\u6a5f\u5668\u53ef\u4ee5\u9810\u6e2c\u6216\u8005\u63a8\u8ad6\u3002\u5176\u4e2d\u9019\u5e7e\u5e74\u5f88\u592f\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u9019\u4e00\u540d\u8a5e\u5176\u5be6\u662f\u7531\u4e09\u7a2e\u4eba\u6240\u7d44\u5408\u8d77\u4f86\u7684\u3002\u7b2c\u4e00\u500b\u662f\u6578\u5b78\u8207\u7d71\u8a08\u80cc\u666f\u7684\u4eba\uff0c\u4ed6\u5011\u80fd\u5920\u900f\u904e\u5c0d\u8cc7\u6599\u7684\u654f\u611f\u5ea6\u5f9e\u4e00\u5927\u7fa4\u539f\u59cb\u8cc7\u6599\u4e2d\u63a2\u7d22\u6709\u610f\u7fa9\u7684\u8cc7\u8a0a\u3002\u4e26\u8a2d\u8a08\u4e00\u5957\u9069\u5408\u7684\u6a21\u578b\u70ba\u9019\u4e00\u7fa4\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u7b2c\u4e8c\u7a2e\u4eba\u662f\u96fb\u8166\u79d1\u5b78\u80cc\u666f\u7684\u5de5\u7a0b\u5e2b\uff0c\u4ed6\u5011\u64c5\u9577\u7a0b\u5f0f\u8a9e\u8a00\u80fd\u5920\u5c07\u8907\u96dc\u7684\u6578\u5b78\u6a21\u578b\u5920\u904e\u7a0b\u5f0f\u5be6\u4f5c\u4e26\u4e14\u5354\u52a9\u843d\u5730\u6574\u5408\u3002\u7576\u7136\u73fe\u4eca\u6709\u975e\u5e38\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u5957\u4ef6\u4f8b\u5982 Sklearn\u3001TensorFlow......\u7b49\uff0c\u964d\u4f4e\u4e86\u5927\u5bb6\u5b78\u7fd2\u7684\u9580\u6abb\uff0c\u4e0d\u4e00\u5b9a\u662f\u8981\u7406\u5de5\u80cc\u666f\u7684\u4eba\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e9b\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u4e00\u7aba\u4eba\u5de5\u667a\u6167\u7684\u5967\u79d8\u3002\u9664\u6b64\u4e4b\u5916 MLOps \u662f\u8fd1\u5e74\u4f86\u5ef6\u4f38\u51fa\u4f86\u7684\u65b0\u540d\u8a5e\uff0c\u5176\u5be6\u6982\u5ff5\u8207 DevOps \u985e\u4f3c\u4e26\u5c07\u9019\u4e00\u5957\u6a5f\u5236\u8907\u88fd\u5728\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u4e0a\uff0c\u6211\u5011\u5e73\u6642\u6240\u57f7\u884c\u7684 AI \u5c08\u6848\u5fc5\u9808\u900f\u904e\u6301\u7e8c\u6027\u6574\u5408\u8207\u7dad\u904b\u7684\u89c0\u5ff5\u4e0d\u65b7\u7684\u9031\u671f\u6027\u66f4\u65b0\u5f9e\u6700\u65b0\u6536\u96c6\u5230\u5f97\u6578\u64da\u91cd\u65b0\u5b78\u7fd2\u6a21\u578b\u8d8a\u4f86\u8d8a\u8cbc\u8fd1\u4f7f\u7528\u8005\u3002\u6700\u5f8c\u4e00\u500b\u95dc\u9375\u7684\u4eba\u7269\u5c31\u662f\u5404\u884c\u5404\u696d\u7684\u9818\u57df\u5c08\u5bb6\uff0c\u56e0\u70ba AI \u518d\u4e5f\u4e0d\u662f\u8cc7\u8a0a\u80cc\u666f\u4eba\u7684\u5c08\u5229\u3002\u6211\u5011\u53ef\u4ee5\u5920\u904e AI \u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\uff0c\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u8207\u9818\u57df\u5c08\u5bb6\u9032\u884c\u5408\u4f5c\u5354\u52a9\u8cc7\u6599\u6e05\u8207\u8207\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u7e3d\u4e4b\u8981\u6210\u70ba\u4e00\u500b\u597d\u7684\u8cc7\u79d1\u5b78\u5bb6\u4e0a\u8ff0\u4e09\u7a2e\u4eba\u7684\u7279\u6027\u7f3a\u4e00\u4e0d\u53ef\u3002 \u6a5f\u5668\u5b78\u7fd2\u7a2e\u985e \u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u4f9d\u7167\u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u5927\u81f4\u53ef\u4ee5\u5206\u6210\u4ee5\u4e0b\u5e7e\u985e\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u7121\u9700\u6a19\u7c64/\u7b54\u6848\u5373\u53ef\u5b78\u7fd2 Ex: \u96c6\u7fa4 (Clustering) \u76e3\u7763\u5f0f\u5b78\u7fd2 \u9700\u8981\u6a19\u7c64/\u7b54\u6848\u624d\u80fd\u5b78\u7fd2 Ex: \u5206\u985e (Classification) \u3001 \u56de\u6b78 (Regression) \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 \u81ea\u76e3\u7763\u5b78\u7fd2 \u5f37\u5316\u5b78\u7fd2 \u5982\u4f55\u64f7\u53d6\u597d\u7684\u7279\u5fb5\u662f\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5f88\u91cd\u8981\u7684\u4e00\u4ef6\u4e8b \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u7c21\u55ae\u4f86\u8aaa\u6a5f\u5668\u5b78\u7fd2\u5c31\u662f\u8981\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u7576\u4e2d\u627e\u51fa\u4e00\u500b\u6578\u5b78\u6a21\u578b\u3002\u9019\u500b\u6578\u5b78\u6a21\u578b\u53ef\u4ee5\u7a31\u4f5c\u662f\u4e00\u500b f(x)=y \u5176\u4e2d x \u70ba\u8f38\u5165\u7684\u8cc7\u6599\uff0cy \u70ba\u8a72\u7b46\u8cc7\u6599\u6240\u76f8\u5c0d\u61c9\u7684\u8f38\u51fa\u3002\u5176\u4e2d f \u5373\u70ba\u51fd\u6578\uff0c\u4e5f\u5c31\u662f\u4efb\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u81f3\u65bc\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u6709\u54ea\u4e9b\u5462\uff1f\u4f8b\u5982\u7dda\u6027\u8ff4\u6b78\u3001\u908f\u8f2f\u56de\u6b78\u3001KNN\u3001SVM\u3001\u6c7a\u7b56\u6a39\u3001\u96a8\u6a5f\u68ee\u6797\u3001XGBoost......\u7b49\u3002\u4e4b\u5f8c\u7684\u7cfb\u5217\u6587\u7ae0\u90fd\u6703\u4f9d\u5e8f\u5411\u5404\u4f4d\u89e3\u91cb\u3002 \u4ec0\u9ebc\u662f\u8cc7\u6599? \u4e00\u822c\u4f86\u8aaa\u8cc7\u6599\u53ef\u4ee5\u5206\u6210\u5169\u500b\u90e8\u5206\u3002\u4ee5\u4e00\u500b\u5206\u985e\u7684\u554f\u984c\u4f86\u8aaa\uff0c\u5206\u5225\u6709\u8f38\u5165\u7684\u7279\u5fb5\u4ee5\u53ca\u8a72\u7b46\u8cc7\u6599\u76f8\u5c0d\u61c9\u7684\u7b54\u6848\u7a31\u4f5c\u6a19\u8a18\u3002AI \u9019\u500b\u9818\u57df\u5c31\u662f\u8b93\u6a5f\u5668\u6709\u5b78\u7fd2\u89e3\u6c7a\u554f\u984c\u7684\u80fd\u529b\uff0c\u800c\u4e0d\u662f\u6211\u5011\u544a\u8a34\u4ed6\u61c9\u8a72\u600e\u9ebc\u89e3\u6c7a\u554f\u984c\u3002\u6211\u5011\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u5047\u8a2d\u6211\u5011\u9700\u8981\u9810\u6e2c\u660e\u5929\u662f\u5426\u6703\u4e0b\u96e8\u3002\u6211\u5011\u7684\u8f38\u5165\u7279\u5fb5\u5c31\u53ef\u4ee5\u6709\u5404\u500b\u89c0\u6e2c\u7ad9\u7684\u96f2\u91cf\u8207\u6eab\u6fd5\u5ea6\u4f5c\u70ba\u6a21\u578b\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c\u6bcf\u4e00\u7b46\u7684\u5929\u6c23\u8cc7\u8a0a\u90fd\u5c0d\u61c9\u8457\u662f\u5426\u6703\u4e0b\u96e8\u7684\u6a19\u6e96\u7b54\u6848\u3002 \u7279\u5fb5 (Feature): \u7528\u4f86\u63cf\u8ff0\u6bcf\u4e00\u7b46\u8cc7\u6599\uff0c\u901a\u5e38\u6703\u7528 X \u4f86\u8868\u793a \u6a19\u8a18 (Label): \u7528\u4f86\u8868\u793a\u6bcf\u4e00\u7b46\u8cc7\u6599\u6240\u5c0d\u61c9\u7684\u8f38\u51fa\uff0c\u9019\u500b\u8f38\u51fa\u6a23\u5f0f\u53ef\u4ee5\u6709\u4e0d\u540c\u7684\u72c0\u614b(\u53ef\u80fd\u662f\u985e\u5225\u6216\u8005\u5be6\u6578\u503c\u7b49)\uff0c\u901a\u5e38\u6703\u7528 Y \u4f86\u8868\u793a\u3002 \u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b \u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u9996\u5148\u5b9a\u7fa9\u554f\u984c\uff0c\u7d93\u904e\u9700\u6c42\u8a0e\u8ad6\u8207\u8a55\u4f30\u5f8c\u6709\u500b\u660e\u78ba\u7684\u76ee\u6a19\u4e26\u958b\u59cb\u57f7\u884c\u5c08\u6848\u3002\u63a5\u8457\u958b\u59cb\u641c\u96c6\u8cc7\u6599\uff0c\u7531\u65bc\u5404\u5834\u57df\u6240\u6536\u96c6\u5230\u7684\u539f\u59cb\u6578\u64da\u53ef\u80fd\u5c1a\u672a\u6574\u7406\u4ee5\u53ca\u683c\u5f0f\u5c1a\u672a\u7d71\u4e00\u3002\u56e0\u6b64\u7b2c\u4e09\u6b65\u7684\u8cc7\u6599\u6e05\u7406\u6975\u70ba\u91cd\u8981\uff0c\u6709\u500b\u4e7e\u6de8\u7684\u8cc7\u6599\u53ef\u4ee5\u5c0d\u6a21\u578b\u8868\u73fe\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u8cc7\u6599\u4e00\u5207\u5c31\u7dd2\u5f8c\u5efa\u8b70\u5728\u5efa\u6a21\u4e4b\u524d\u5148\u5c0d\u8cc7\u6599\u9032\u884c\u8996\u89ba\u5316\u5206\u6790\uff0c\u4e26\u70ba\u6578\u64da\u505a\u524d\u8655\u7406\u4ee5\u53ca\u5c08\u696d\u77e5\u8b58\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u5c0d\u8cc7\u6599\u6709\u521d\u6b65\u7684\u8a8d\u8b58\u5f8c\uff0c\u63a5\u8457\u6311\u9078\u5408\u9069\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u8207\u8a55\u4f30\u6a21\u578b\u3002\u5728\u6a21\u578b\u6b63\u5f0f\u4e0a\u7dda\u4e4b\u524d\uff0c\u5148\u900f\u904e\u6e2c\u8a66\u96c6\u6216\u662f\u4ea4\u53c9\u9a57\u8b49\u7b49\u6a5f\u5236\u78ba\u8a8d\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u6a21\u578b\u78ba\u8a8d\u6c92\u6709\u554f\u984c\u5f8c\u5373\u53ef\u5c07\u6a21\u578b\u6253\u5305\u8f38\u51fa\uff0c\u4e26\u4e14\u8207\u5be6\u969b\u5834\u57df\u61c9\u7528\u9032\u884c\u6574\u5408\u3002\u6700\u7d42\u5c31\u662f\u90e8\u7f72\u6a21\u578b\u4ee5\u53ca\u7dad\u904b\uff0c\u6301\u7e8c\u5c07\u5834\u57df\u8490\u96c6\u5230\u7684\u65b0\u8cc7\u6599\u9032\u884c\u518d\u8a13\u7df4\uff0c\u5f62\u6210\u4e00\u500b\u958b\u767c\u5faa\u74b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#day-5","text":"","title":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_1","text":"\u4e86\u89e3\u6a5f\u5668\u5b78\u7fd2\u662f\u4ec0\u9ebc \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u8cc7\u6599\u79d1\u5b78\u4e09\u528d\u5ba2 \u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u6709\u54ea\u4e9b\uff1f \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u8a8d\u8b58\u4ec0\u9ebc\u662f\u8cc7\u6599 \u6a5f\u5668\u5b78\u7fd2\u7684\u6d41\u7a0b","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_2","text":"\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u627e\u51fa\u89e3\u6c7a\u554f\u984c\u7684\u65b9\u6cd5\u3002\u7c21\u55ae\u4f86\u8aaa\u4f60\u53ea\u8981\u5c07\u5927\u91cf\u7684\u8cc7\u6599\u9935\u7d66\u96fb\u8166\uff0c\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u6703\u70ba\u4f60\u91cf\u8eab\u6253\u9020\u5b78\u7fd2\u51fa\u4e00\u500b\u7279\u5b9a\u7684\u6a21\u578b\u7d66\u4f60\uff0c\u800c\u4e0d\u662f\u518d\u900f\u904e\u4eba\u985e\u624b\u52d5\u7684\u7d66\u4e88\u898f\u5247\u3002\u900f\u904e\u4e00\u5806\u8cc7\u6599\u6709\u6a19\u7c64\u7d66\u7b54\u6848\uff0c\u4e26\u5f9e\u8cc7\u6599\u96c6\u5b78\u7fd2\u8207\u6a19\u8a18\u9593\u7684\u95dc\u806f\uff0c\u6700\u5f8c\u518d\u5f9e\u975e\u7279\u5b9a\u8cc7\u6599\u53bb\u8fa8\u8a8d\u7b54\u6848\u3002","title":"\u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_3","text":"\u5176\u5be6\u4eba\u5de5\u667a\u6167\u7684\u61c9\u7528\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\u96a8\u8655\u53ef\u898b\uff0c\u5f9e\u88fd\u9020\u3001\u91ab\u7642\u3001\u91d1\u878d\u3001\u4ea4\u901a\u3001\u5b89\u9632\u3001 \u96f6\u552e\u3001\u7269\u6d41\u3001\u8fb2\u696d......\u7b49\u90fd\u53ef\u4ee5\u770b\u5230\u8207 AI \u7684\u76f8\u95dc\u61c9\u7528\u3002\u7576\u7136\u4eba\u5de5\u667a\u6167\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u66c7\u82b1\u4e00\u73fe\uff0cArtificial Intelligence \u9019\u4e00\u8a5e\u5176\u5be6\u65e9\u5728 20 \u4e16\u7d00\u4e2d\u5c31\u88ab\u63d0\u51fa\uff0c\u8d77\u521d\u7576\u7136\u4e0d\u88ab\u770b\u597d\u751a\u81f3\u5927\u5bb6\u90fd\u89ba\u5f97\u8981\u4e00\u500b\u6a5f\u5668\u4eba\u5b78\u6703\u4eba\u985e\u7684\u667a\u6167\u662f\u5929\u65b9\u591c\u8b5a\u7684\u4e8b\u60c5\uff0c\u4e2d\u9593\u4e5f\u7d93\u6b77\u597d\u5e7e\u6b21 AI \u5bd2\u51ac\uff0c\u73fe\u5728\u56de\u904e\u982d\u4f86\u770b AI \u7684\u7814\u7a76\u9818\u57df\u8d77\u4f0f\u4f0f\u3002\u4e0d\u904e\u96a8\u8457\u8edf\u786c\u9ad4\u7684\u9032\u6b65\uff0c\u9010\u6f38\u4f7f\u5f97\u9700\u8981\u5927\u91cf\u8a08\u7b97\u7684\u4eba\u5de5\u667a\u6167\u6280\u8853\u6162\u6162\u7684\u88ab\u6316\u6398\u51fa\u4f86\u3002\u8fd1\u5e74\u4f86 AI \u65b0\u5275\u5982\u8207\u6625\u7b4d\u822c\u5192\u51fa\uff0c\u667a\u6167\u6a5f\u5668\u4eba\u3001\u611f\u77e5\u8b58\u5225\u3001\u81ea\u7136\u8a9e\u8a00\u8655\u7406\u3001\u5c0d\u8a71\u5ba2\u670d\u3001\u81ea\u52d5\u99d5\u99db\u3001\u7455\u75b5\u6aa2\u6e2c\u3001\u9810\u9632\u6027\u7dad\u4fee\u3001\u81ea\u52d5\u6d41\u7a0b\u63a7\u5236\u3001\u539f\u6599\u7d44\u5408\u6700\u4f73\u5316......\u7b49\u3002","title":"\u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_4","text":"\u5176\u5be6\u4eba\u5de5\u667a\u6167\u9019\u9805\u9818\u57df\u53c8\u5206\u6210\u5f88\u591a\u9580\u6d3e\uff0c\u5f9e\u6700\u65e9\u7684\u7b26\u865f\u908f\u8f2f\u3001\u5c08\u5bb6\u7cfb\u7d71\u958b\u59cb\u8aaa\u8d77\u3002\u65e9\u671f\u7684 AI \u662f\u5c07\u4eba\u985e\u7684\u5c08\u5bb6\u77e5\u8b58\u900f\u904e\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\u653e\u5230\u6a5f\u5668\u4eba\u7684\u5927\u8166\u4e2d\uff0c\u4e26\u8ce6\u4e88\u6a5f\u5668\u4eba\u667a\u6167\u4f7f\u5f97\u6709\u80fd\u529b\u5224\u65b7\u4e8b\u7269\u3002\u7576\u7136\u4eba\u985e\u5c08\u5bb6\u7684\u77e5\u8b58\u59cb\u7d42\u6709\u9650\uff0c\u96a8\u8457\u7db2\u8def\u8207\u500b\u4eba\u96fb\u8166\u666e\u53ca\u4e26\u9032\u5165\u4e86\u5927\u6578\u64da\u6642\u4ee3\u3002\u5404\u500b\u79d1\u5b78\u5bb6\u65bc\u662f\u958b\u59cb\u601d\u8003\u5982\u4f55\u5c07\u9019\u4e9b\u641c\u96c6\u4f86\u7684\u5927\u91cf\u6578\u64da\u9032\u884c\u61c9\u7528\u8207\u5206\u6790\uff1f\u6a5f\u5668\u5b78\u7fd2\u4e00\u8a5e\u5c31\u51fa\u73fe\u4e86\uff0c\u76ee\u6a19\u662f\u900f\u904e\u73fe\u5be6\u751f\u6d3b\u4e2d\u6240\u6536\u96c6\u7684\u8cc7\u6599\uff0c\u642d\u914d\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u51fa\u4f86\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u6a5f\u5668\u4eba\u6709\u5224\u65b7\u8207\u9810\u6e2c\u7684\u80fd\u529b\u3002\u7576\u7136\u8fd1\u5e7e\u5e74\u71b1\u9580\u7684\u6df1\u5ea6\u5b78\u7fd2\u5176\u5be6\u50c5\u662f\u500b\u6a5f\u5668\u5b78\u7fd2\u88e1\u9762\u7684\u5176\u4e2d\u4e00\u7a2e\u5b78\u7fd2\u7684\u65b9\u6cd5\uff0c\u4ed6\u662f\u6a21\u4eff\u4eba\u985e\u7684\u795e\u7d93\u7cfb\u7d71\uff0c\u900f\u904e\u5927\u91cf\u7684\u795e\u7d93\u5143\u8207\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u5efa\u69cb\u51fa\u4f86\u7684\u8907\u96dc\u6578\u5b78\u6a21\u578b\u3002\u7136\u800c\u5728\u672c\u7cfb\u5217\u6559\u5b78\u4e2d\u6211\u5011\u6703\u5f9e\u6700\u57fa\u790e\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u958b\u59cb\u63d0\u8d77\uff0c\u4e26\u4e00\u6b65\u4e00\u6b65\u7684\u5e36\u9818\u8b80\u8005\u6210\u70ba\u4e00\u4f4d\u771f\u6b63\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u3002","title":"\u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_5","text":"\u8cc7\u6599\u79d1\u5b78\u4e3b\u8981\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u6280\u8853\uff0c\u8b93\u6a5f\u5668\u53ef\u4ee5\u9810\u6e2c\u6216\u8005\u63a8\u8ad6\u3002\u5176\u4e2d\u9019\u5e7e\u5e74\u5f88\u592f\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u9019\u4e00\u540d\u8a5e\u5176\u5be6\u662f\u7531\u4e09\u7a2e\u4eba\u6240\u7d44\u5408\u8d77\u4f86\u7684\u3002\u7b2c\u4e00\u500b\u662f\u6578\u5b78\u8207\u7d71\u8a08\u80cc\u666f\u7684\u4eba\uff0c\u4ed6\u5011\u80fd\u5920\u900f\u904e\u5c0d\u8cc7\u6599\u7684\u654f\u611f\u5ea6\u5f9e\u4e00\u5927\u7fa4\u539f\u59cb\u8cc7\u6599\u4e2d\u63a2\u7d22\u6709\u610f\u7fa9\u7684\u8cc7\u8a0a\u3002\u4e26\u8a2d\u8a08\u4e00\u5957\u9069\u5408\u7684\u6a21\u578b\u70ba\u9019\u4e00\u7fa4\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u7b2c\u4e8c\u7a2e\u4eba\u662f\u96fb\u8166\u79d1\u5b78\u80cc\u666f\u7684\u5de5\u7a0b\u5e2b\uff0c\u4ed6\u5011\u64c5\u9577\u7a0b\u5f0f\u8a9e\u8a00\u80fd\u5920\u5c07\u8907\u96dc\u7684\u6578\u5b78\u6a21\u578b\u5920\u904e\u7a0b\u5f0f\u5be6\u4f5c\u4e26\u4e14\u5354\u52a9\u843d\u5730\u6574\u5408\u3002\u7576\u7136\u73fe\u4eca\u6709\u975e\u5e38\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u5957\u4ef6\u4f8b\u5982 Sklearn\u3001TensorFlow......\u7b49\uff0c\u964d\u4f4e\u4e86\u5927\u5bb6\u5b78\u7fd2\u7684\u9580\u6abb\uff0c\u4e0d\u4e00\u5b9a\u662f\u8981\u7406\u5de5\u80cc\u666f\u7684\u4eba\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e9b\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u4e00\u7aba\u4eba\u5de5\u667a\u6167\u7684\u5967\u79d8\u3002\u9664\u6b64\u4e4b\u5916 MLOps \u662f\u8fd1\u5e74\u4f86\u5ef6\u4f38\u51fa\u4f86\u7684\u65b0\u540d\u8a5e\uff0c\u5176\u5be6\u6982\u5ff5\u8207 DevOps \u985e\u4f3c\u4e26\u5c07\u9019\u4e00\u5957\u6a5f\u5236\u8907\u88fd\u5728\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u4e0a\uff0c\u6211\u5011\u5e73\u6642\u6240\u57f7\u884c\u7684 AI \u5c08\u6848\u5fc5\u9808\u900f\u904e\u6301\u7e8c\u6027\u6574\u5408\u8207\u7dad\u904b\u7684\u89c0\u5ff5\u4e0d\u65b7\u7684\u9031\u671f\u6027\u66f4\u65b0\u5f9e\u6700\u65b0\u6536\u96c6\u5230\u5f97\u6578\u64da\u91cd\u65b0\u5b78\u7fd2\u6a21\u578b\u8d8a\u4f86\u8d8a\u8cbc\u8fd1\u4f7f\u7528\u8005\u3002\u6700\u5f8c\u4e00\u500b\u95dc\u9375\u7684\u4eba\u7269\u5c31\u662f\u5404\u884c\u5404\u696d\u7684\u9818\u57df\u5c08\u5bb6\uff0c\u56e0\u70ba AI \u518d\u4e5f\u4e0d\u662f\u8cc7\u8a0a\u80cc\u666f\u4eba\u7684\u5c08\u5229\u3002\u6211\u5011\u53ef\u4ee5\u5920\u904e AI \u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\uff0c\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u8207\u9818\u57df\u5c08\u5bb6\u9032\u884c\u5408\u4f5c\u5354\u52a9\u8cc7\u6599\u6e05\u8207\u8207\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u7e3d\u4e4b\u8981\u6210\u70ba\u4e00\u500b\u597d\u7684\u8cc7\u79d1\u5b78\u5bb6\u4e0a\u8ff0\u4e09\u7a2e\u4eba\u7684\u7279\u6027\u7f3a\u4e00\u4e0d\u53ef\u3002","title":"\u8cc7\u6599\u79d1\u5b78 \u2715 \u4e09\u528d\u5ba2"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_6","text":"\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u4f9d\u7167\u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u5927\u81f4\u53ef\u4ee5\u5206\u6210\u4ee5\u4e0b\u5e7e\u985e\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u7121\u9700\u6a19\u7c64/\u7b54\u6848\u5373\u53ef\u5b78\u7fd2 Ex: \u96c6\u7fa4 (Clustering) \u76e3\u7763\u5f0f\u5b78\u7fd2 \u9700\u8981\u6a19\u7c64/\u7b54\u6848\u624d\u80fd\u5b78\u7fd2 Ex: \u5206\u985e (Classification) \u3001 \u56de\u6b78 (Regression) \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 \u81ea\u76e3\u7763\u5b78\u7fd2 \u5f37\u5316\u5b78\u7fd2 \u5982\u4f55\u64f7\u53d6\u597d\u7684\u7279\u5fb5\u662f\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5f88\u91cd\u8981\u7684\u4e00\u4ef6\u4e8b","title":"\u6a5f\u5668\u5b78\u7fd2\u7a2e\u985e"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_7","text":"\u7c21\u55ae\u4f86\u8aaa\u6a5f\u5668\u5b78\u7fd2\u5c31\u662f\u8981\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u7576\u4e2d\u627e\u51fa\u4e00\u500b\u6578\u5b78\u6a21\u578b\u3002\u9019\u500b\u6578\u5b78\u6a21\u578b\u53ef\u4ee5\u7a31\u4f5c\u662f\u4e00\u500b f(x)=y \u5176\u4e2d x \u70ba\u8f38\u5165\u7684\u8cc7\u6599\uff0cy \u70ba\u8a72\u7b46\u8cc7\u6599\u6240\u76f8\u5c0d\u61c9\u7684\u8f38\u51fa\u3002\u5176\u4e2d f \u5373\u70ba\u51fd\u6578\uff0c\u4e5f\u5c31\u662f\u4efb\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u81f3\u65bc\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u6709\u54ea\u4e9b\u5462\uff1f\u4f8b\u5982\u7dda\u6027\u8ff4\u6b78\u3001\u908f\u8f2f\u56de\u6b78\u3001KNN\u3001SVM\u3001\u6c7a\u7b56\u6a39\u3001\u96a8\u6a5f\u68ee\u6797\u3001XGBoost......\u7b49\u3002\u4e4b\u5f8c\u7684\u7cfb\u5217\u6587\u7ae0\u90fd\u6703\u4f9d\u5e8f\u5411\u5404\u4f4d\u89e3\u91cb\u3002","title":"\u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_8","text":"\u4e00\u822c\u4f86\u8aaa\u8cc7\u6599\u53ef\u4ee5\u5206\u6210\u5169\u500b\u90e8\u5206\u3002\u4ee5\u4e00\u500b\u5206\u985e\u7684\u554f\u984c\u4f86\u8aaa\uff0c\u5206\u5225\u6709\u8f38\u5165\u7684\u7279\u5fb5\u4ee5\u53ca\u8a72\u7b46\u8cc7\u6599\u76f8\u5c0d\u61c9\u7684\u7b54\u6848\u7a31\u4f5c\u6a19\u8a18\u3002AI \u9019\u500b\u9818\u57df\u5c31\u662f\u8b93\u6a5f\u5668\u6709\u5b78\u7fd2\u89e3\u6c7a\u554f\u984c\u7684\u80fd\u529b\uff0c\u800c\u4e0d\u662f\u6211\u5011\u544a\u8a34\u4ed6\u61c9\u8a72\u600e\u9ebc\u89e3\u6c7a\u554f\u984c\u3002\u6211\u5011\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u5047\u8a2d\u6211\u5011\u9700\u8981\u9810\u6e2c\u660e\u5929\u662f\u5426\u6703\u4e0b\u96e8\u3002\u6211\u5011\u7684\u8f38\u5165\u7279\u5fb5\u5c31\u53ef\u4ee5\u6709\u5404\u500b\u89c0\u6e2c\u7ad9\u7684\u96f2\u91cf\u8207\u6eab\u6fd5\u5ea6\u4f5c\u70ba\u6a21\u578b\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c\u6bcf\u4e00\u7b46\u7684\u5929\u6c23\u8cc7\u8a0a\u90fd\u5c0d\u61c9\u8457\u662f\u5426\u6703\u4e0b\u96e8\u7684\u6a19\u6e96\u7b54\u6848\u3002 \u7279\u5fb5 (Feature): \u7528\u4f86\u63cf\u8ff0\u6bcf\u4e00\u7b46\u8cc7\u6599\uff0c\u901a\u5e38\u6703\u7528 X \u4f86\u8868\u793a \u6a19\u8a18 (Label): \u7528\u4f86\u8868\u793a\u6bcf\u4e00\u7b46\u8cc7\u6599\u6240\u5c0d\u61c9\u7684\u8f38\u51fa\uff0c\u9019\u500b\u8f38\u51fa\u6a23\u5f0f\u53ef\u4ee5\u6709\u4e0d\u540c\u7684\u72c0\u614b(\u53ef\u80fd\u662f\u985e\u5225\u6216\u8005\u5be6\u6578\u503c\u7b49)\uff0c\u901a\u5e38\u6703\u7528 Y \u4f86\u8868\u793a\u3002","title":"\u4ec0\u9ebc\u662f\u8cc7\u6599?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_9","text":"\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u9996\u5148\u5b9a\u7fa9\u554f\u984c\uff0c\u7d93\u904e\u9700\u6c42\u8a0e\u8ad6\u8207\u8a55\u4f30\u5f8c\u6709\u500b\u660e\u78ba\u7684\u76ee\u6a19\u4e26\u958b\u59cb\u57f7\u884c\u5c08\u6848\u3002\u63a5\u8457\u958b\u59cb\u641c\u96c6\u8cc7\u6599\uff0c\u7531\u65bc\u5404\u5834\u57df\u6240\u6536\u96c6\u5230\u7684\u539f\u59cb\u6578\u64da\u53ef\u80fd\u5c1a\u672a\u6574\u7406\u4ee5\u53ca\u683c\u5f0f\u5c1a\u672a\u7d71\u4e00\u3002\u56e0\u6b64\u7b2c\u4e09\u6b65\u7684\u8cc7\u6599\u6e05\u7406\u6975\u70ba\u91cd\u8981\uff0c\u6709\u500b\u4e7e\u6de8\u7684\u8cc7\u6599\u53ef\u4ee5\u5c0d\u6a21\u578b\u8868\u73fe\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u8cc7\u6599\u4e00\u5207\u5c31\u7dd2\u5f8c\u5efa\u8b70\u5728\u5efa\u6a21\u4e4b\u524d\u5148\u5c0d\u8cc7\u6599\u9032\u884c\u8996\u89ba\u5316\u5206\u6790\uff0c\u4e26\u70ba\u6578\u64da\u505a\u524d\u8655\u7406\u4ee5\u53ca\u5c08\u696d\u77e5\u8b58\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u5c0d\u8cc7\u6599\u6709\u521d\u6b65\u7684\u8a8d\u8b58\u5f8c\uff0c\u63a5\u8457\u6311\u9078\u5408\u9069\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u8207\u8a55\u4f30\u6a21\u578b\u3002\u5728\u6a21\u578b\u6b63\u5f0f\u4e0a\u7dda\u4e4b\u524d\uff0c\u5148\u900f\u904e\u6e2c\u8a66\u96c6\u6216\u662f\u4ea4\u53c9\u9a57\u8b49\u7b49\u6a5f\u5236\u78ba\u8a8d\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u6a21\u578b\u78ba\u8a8d\u6c92\u6709\u554f\u984c\u5f8c\u5373\u53ef\u5c07\u6a21\u578b\u6253\u5305\u8f38\u51fa\uff0c\u4e26\u4e14\u8207\u5be6\u969b\u5834\u57df\u61c9\u7528\u9032\u884c\u6574\u5408\u3002\u6700\u7d42\u5c31\u662f\u90e8\u7f72\u6a21\u578b\u4ee5\u53ca\u7dad\u904b\uff0c\u6301\u7e8c\u5c07\u5834\u57df\u8490\u96c6\u5230\u7684\u65b0\u8cc7\u6599\u9032\u884c\u518d\u8a13\u7df4\uff0c\u5f62\u6210\u4e00\u500b\u958b\u767c\u5faa\u74b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/","text":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u4f55\u8b02\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2? \u96c6\u7fa4\u5206\u6790? \u5206\u7fa4\u6f14\u7b97\u6cd5\u4ecb\u7d39 K-means \u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2(Un-supervised learning) \u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6c92\u6709\u6240\u8b02\u7684\u6a19\u6e96\u7b54\u6848\uff0c\u6545\u6a5f\u5668\u6703\u81ea\u5df1\u5f9e\u8cc7\u6599\u7fa4\u4e2d\u627e\u51fa\u4e00\u5957\u5206\u7fa4\u7684\u6cd5\u5247\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u512a\u9ede\u662f\u4e0d\u9700\u8981\u4e8b\u5148\u4ee5\u4eba\u529b\u6a19\u7c64\uff0c\u53ea\u7d66\u5b9a\u7279\u5fb5\u8b93\u6a5f\u5668\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u5e38\u898b\u7684\u975e\u76e3\u7763\u5f0f\u7684\u5206\u7fa4\u6f14\u7b97\u6cd5\u6709 K-means\uff0c\u5b83\u6839\u64da\u7269\u4ee5\u985e\u805a\u7684\u539f\u7406\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u628a\u8cc7\u6599\u6a23\u672c\u5206\u70ba K \u7fa4\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u6a21\u578b\u6642\u50c5\u9808\u5c0d\u6a5f\u5668\u63d0\u4f9b\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u5229\u7528\u5206\u7fa4\u6f14\u7b97\u6cd5\u81ea\u52d5\u5f9e\u9019\u4e9b\u7279\u5fb5\u4e2d\u627e\u51fa\u9130\u8fd1\u7684\u96c6\u7fa4\u4e2d\u5fc3\u4f5c\u70ba\u8a72\u985e\u5225\u3002 K-means \u6f14\u7b97\u6cd5 \u900f\u904e\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u6211\u5011\u80fd\u5920\u5c07\u591a\u7a2e\u7dad\u5ea6\u7684\u8cc7\u6599\u9032\u884c\u5206\u985e\u3002K-means \u6f14\u7b97\u6cd5\u7684\u6982\u5ff5\u5f88\u7c21\u55ae\u4e5f\u975e\u5e38\u5bb9\u6613\u5be6\u4f5c\uff0c\u50c5\u4e00\u822c\u52a0\u6e1b\u4e58\u9664\u5c31\u597d\u4e0d\u9700\u8907\u96dc\u7684\u8a08\u7b97\u516c\u5f0f\u3002 \u521d\u59cb\u5316: \u6307\u5b9a K \u500b\u5206\u7fa4\uff0c\u4e26\u96a8\u6a5f\u6311\u9078 K \u500b\u8cc7\u6599\u9ede\u7684\u503c\u7576\u4f5c\u7fa4\u7d44\u4e2d\u5fc3\u503c \u5206\u914d\u8cc7\u6599\u9ede: \u5c07\u6bcf\u500b\u8cc7\u6599\u9ede\u8a2d\u70ba\u8ddd\u96e2\u6700\u8fd1\u7684\u4e2d\u5fc3 \u8a08\u7b97\u5e73\u5747\u503c: \u91cd\u65b0\u8a08\u7b97\u6bcf\u500b\u5206\u7fa4\u7684\u4e2d\u5fc3\u9ede \u91cd\u8907\u6b65\u9a5f2\u30013\uff0c\u76f4\u5230\u8cc7\u6599\u9ede\u4e0d\u518d\u8b8a\u63db\u7fa4\u7d44\u70ba\u6b62 [\u7a0b\u5f0f\u5be6\u4f5c] \u8f09\u5165\u76f8\u95dc\u5957\u4ef6 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u6211\u5011\u4eca\u5929\u8981\u5be6\u4f5c\u5206\u7fa4\u5206\u985e\u7684\u554f\u984c\uff0c\u56e0\u6b64\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u975e\u5e38\u9069\u5408\u7576\u4f5c\u7bc4\u4f8b\u3002\u5176\u8cc7\u6599\u96c6\u8f09\u5165\u65b9\u5f0f\u5728\u7b2c\u56db\u5929\u6709\u63d0\u904e\uff0c\u662f\u4e00\u6a23\u7684\u5167\u5bb9\uff01 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data K-Means K-means \u6f14\u7b97\u6cd5\u5728 Sklearn \u5957\u4ef6\u4e2d\u5df2\u7d93\u5e6b\u6211\u5011\u5c01\u88dd\u597d\u4e86\uff0c\u4f7f\u7528\u8005\u53ea\u8981\u547c\u53eb API \u5373\u53ef\u5c07\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u5feb\u901f\u5be6\u4f5c\u3002 Parameters: - n_cluster: K\u7684\u5927\u5c0f\uff0c\u4e5f\u5c31\u662f\u5206\u7fa4\u7684\u985e\u5225\u6578\u91cf\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u8a2d\u5b9a\u5e38\u6578\u80fd\u5920\u4fdd\u8b49\u6bcf\u6b21\u5206\u7fa4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba300\u4ee3\u3002 Attributes: - inertia_: inertia_\uff1afloat\uff0c\u6bcf\u500b\u9ede\u5230\u5176\u4ed6\u53e2\u96c6\u7684\u8cea\u5fc3\u7684\u8ddd\u96e2\u4e4b\u548c\u3002 - cluster_centers_\uff1a \u7279\u5fb5\u7684\u4e2d\u5fc3\u9ede [n_clusters, n_features] \u3002 Methods: - fit: K\u500b\u96c6\u7fa4\u5206\u985e\u6a21\u578b\u8a13\u7df4\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u985e\u5225\u3002 - fit_predict: \u5148\u547c\u53ebfit()\u505a\u96c6\u7fa4\u5206\u985e\uff0c\u4e4b\u5f8c\u5728\u547c\u53ebpredict()\u9810\u6e2c\u6700\u7d42\u985e\u5225\u4e26\u56de\u50b3\u8f38\u51fa\u3002 - transform: \u56de\u50b3\u7684\u9663\u5217\u6bcf\u4e00\u884c\u662f\u6bcf\u4e00\u500b\u6a23\u672c\u5230kmeans\u4e2d\u5404\u500b\u4e2d\u5fc3\u9ede\u7684L2(\u6b50\u5e7e\u91cc\u5f97)\u8ddd\u96e2\u3002 - fit_transform: \u5148\u547c\u53ebfit()\u518d\u57f7\u884ctransform()\u3002 from sklearn.cluster import KMeans kmeansModel = KMeans ( n_clusters = 3 , random_state = 46 ) clusters_pred = kmeansModel . fit_predict ( X ) \u8a55\u4f30\u5206\u7fa4\u7d50\u679c \u4f7f\u7528\u8005\u8a2d\u5b9a K \u500b\u5206\u7fa4\u5f8c\uff0c\u8a72\u6f14\u7b97\u6cd5\u5feb\u901f\u7684\u627e\u5230 K \u500b\u4e2d\u5fc3\u9ede\u4e26\u5b8c\u6210\u5206\u7fa4\u5206\u985e\u3002\u64ec\u5408\u597d\u6a21\u578b\u5f8c\u6211\u5011\u53ef\u4ee5\u8a08\u7b97\u5404\u500b\u6a23\u672c\u5230\u8a72\u7fa4\u7684\u4e2d\u5fc3\u9ede\u7684\u8ddd\u96e2\u4e4b\u5e73\u65b9\u548c\uff0c\u7528\u4f86\u8a55\u4f30\u96c6\u7fa4\u7684\u6210\u6548\uff0c\u5176 inertia \u8d8a\u5927\u4ee3\u8868\u8d8a\u5dee\u3002 kmeansModel . inertia_ \u8f38\u51fa\u7d50\u679c\uff1a 78.94084142614602 \u82e5\u8981\u67e5\u770b\u5404\u7fa4\u96c6\u7684\u4e2d\u5fc3\u9ede\uff0c\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u78bc\u3002 kmeansModel . cluster_centers_ \u8f38\u51fa\u7d50\u679c\uff1a array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097], [5.006 , 3.428 , 1.462 , 0.246 ], [6.85 , 3.07368421, 5.74210526, 2.07105263]]) \u5206\u985e\u7d50\u679c \u5982\u4f55\u6c7a\u5b9aK? \u7576\u4f60\u624b\u908a\u6709\u4e00\u7fa4\u8cc7\u6599\uff0c\u4e14\u7121\u6cd5\u4e00\u773c\u770b\u51fa\u6709\u591a\u5c11\u500b\u4e2d\u5fc3\u7684\u72c0\u6cc1\u3002\u53ef\u7528\u4f7f\u7528\u4e0b\u9762\u5169\u7a2e\u65b9\u6cd5\u505a k-means \u6a21\u578b\u8a55\u4f30\u3002 Inertia \u8a08\u7b97\u6240\u6709\u9ede\u5230\u6bcf\u7fa4\u96c6\u4e2d\u5fc3\u8ddd\u96e2\u7684\u5e73\u65b9\u548c\u3002 silhouette scores \u5074\u5f71\u51fd\u6578\u9a57\u8b49\u6578\u64da\u96c6\u7fa4\u5167\u4e00\u81f4\u6027\u7684\u65b9\u6cd5\u3002 \u4f7f\u7528 inertia \u505a\u6a21\u578b\u8a55\u4f30 \u7576K\u503c\u8d8a\u4f86\u8d8a\u5927\uff0cinertia \u6703\u96a8\u4e4b\u8d8a\u4f86\u8d8a\u5c0f\u3002\u6b63\u5e38\u60c5\u6cc1\u4e0b\u4e0d\u6703\u53d6K\u6700\u5927\u7684\uff0c\u4e00\u822c\u662f\u53d6 elbow point \u9644\u8fd1\u4f5c\u70ba K\uff0c\u5373 inertia \u8fc5\u901f\u4e0b\u964d\u8f49\u70ba\u5e73\u7de9\u7684\u90a3\u500b\u9ede\u3002 # k = 1~9 \u505a9\u6b21kmeans, \u4e26\u5c07\u6bcf\u6b21\u7d50\u679c\u7684inertia\u6536\u96c6\u5728\u4e00\u500blist\u88e1 kmeans_list = [ KMeans ( n_clusters = k , random_state = 46 ) . fit ( X ) for k in range ( 1 , 10 )] inertias = [ model . inertia_ for model in kmeans_list ] \u4f7f\u7528 silhouette scores \u505a\u6a21\u578b\u8a55\u4f30 \u53e6\u5916\u4e00\u500b\u65b9\u6cd5\u662f\u7528 silhouette scores \u53bb\u8a55\u4f30\uff0c\u5176\u5206\u6578\u8d8a\u5927\u4ee3\u8868\u5206\u7fa4\u6548\u679c\u8d8a\u597d\u3002 from sklearn.metrics import silhouette_score silhouette_scores = [ silhouette_score ( X , model . labels_ ) for model in kmeans_list [ 1 :]] \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#day-6-k-means","text":"","title":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_1","text":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u4f55\u8b02\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2? \u96c6\u7fa4\u5206\u6790? \u5206\u7fa4\u6f14\u7b97\u6cd5\u4ecb\u7d39 K-means \u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#un-supervised-learning","text":"\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6c92\u6709\u6240\u8b02\u7684\u6a19\u6e96\u7b54\u6848\uff0c\u6545\u6a5f\u5668\u6703\u81ea\u5df1\u5f9e\u8cc7\u6599\u7fa4\u4e2d\u627e\u51fa\u4e00\u5957\u5206\u7fa4\u7684\u6cd5\u5247\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u512a\u9ede\u662f\u4e0d\u9700\u8981\u4e8b\u5148\u4ee5\u4eba\u529b\u6a19\u7c64\uff0c\u53ea\u7d66\u5b9a\u7279\u5fb5\u8b93\u6a5f\u5668\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u5e38\u898b\u7684\u975e\u76e3\u7763\u5f0f\u7684\u5206\u7fa4\u6f14\u7b97\u6cd5\u6709 K-means\uff0c\u5b83\u6839\u64da\u7269\u4ee5\u985e\u805a\u7684\u539f\u7406\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u628a\u8cc7\u6599\u6a23\u672c\u5206\u70ba K \u7fa4\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u6a21\u578b\u6642\u50c5\u9808\u5c0d\u6a5f\u5668\u63d0\u4f9b\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u5229\u7528\u5206\u7fa4\u6f14\u7b97\u6cd5\u81ea\u52d5\u5f9e\u9019\u4e9b\u7279\u5fb5\u4e2d\u627e\u51fa\u9130\u8fd1\u7684\u96c6\u7fa4\u4e2d\u5fc3\u4f5c\u70ba\u8a72\u985e\u5225\u3002","title":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2(Un-supervised learning)"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k-means","text":"\u900f\u904e\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u6211\u5011\u80fd\u5920\u5c07\u591a\u7a2e\u7dad\u5ea6\u7684\u8cc7\u6599\u9032\u884c\u5206\u985e\u3002K-means \u6f14\u7b97\u6cd5\u7684\u6982\u5ff5\u5f88\u7c21\u55ae\u4e5f\u975e\u5e38\u5bb9\u6613\u5be6\u4f5c\uff0c\u50c5\u4e00\u822c\u52a0\u6e1b\u4e58\u9664\u5c31\u597d\u4e0d\u9700\u8907\u96dc\u7684\u8a08\u7b97\u516c\u5f0f\u3002 \u521d\u59cb\u5316: \u6307\u5b9a K \u500b\u5206\u7fa4\uff0c\u4e26\u96a8\u6a5f\u6311\u9078 K \u500b\u8cc7\u6599\u9ede\u7684\u503c\u7576\u4f5c\u7fa4\u7d44\u4e2d\u5fc3\u503c \u5206\u914d\u8cc7\u6599\u9ede: \u5c07\u6bcf\u500b\u8cc7\u6599\u9ede\u8a2d\u70ba\u8ddd\u96e2\u6700\u8fd1\u7684\u4e2d\u5fc3 \u8a08\u7b97\u5e73\u5747\u503c: \u91cd\u65b0\u8a08\u7b97\u6bcf\u500b\u5206\u7fa4\u7684\u4e2d\u5fc3\u9ede \u91cd\u8907\u6b65\u9a5f2\u30013\uff0c\u76f4\u5230\u8cc7\u6599\u9ede\u4e0d\u518d\u8b8a\u63db\u7fa4\u7d44\u70ba\u6b62","title":"K-means \u6f14\u7b97\u6cd5"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_3","text":"import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris","title":"\u8f09\u5165\u76f8\u95dc\u5957\u4ef6"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#1","text":"\u6211\u5011\u4eca\u5929\u8981\u5be6\u4f5c\u5206\u7fa4\u5206\u985e\u7684\u554f\u984c\uff0c\u56e0\u6b64\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u975e\u5e38\u9069\u5408\u7576\u4f5c\u7bc4\u4f8b\u3002\u5176\u8cc7\u6599\u96c6\u8f09\u5165\u65b9\u5f0f\u5728\u7b2c\u56db\u5929\u6709\u63d0\u904e\uff0c\u662f\u4e00\u6a23\u7684\u5167\u5bb9\uff01 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k-means_1","text":"K-means \u6f14\u7b97\u6cd5\u5728 Sklearn \u5957\u4ef6\u4e2d\u5df2\u7d93\u5e6b\u6211\u5011\u5c01\u88dd\u597d\u4e86\uff0c\u4f7f\u7528\u8005\u53ea\u8981\u547c\u53eb API \u5373\u53ef\u5c07\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u5feb\u901f\u5be6\u4f5c\u3002 Parameters: - n_cluster: K\u7684\u5927\u5c0f\uff0c\u4e5f\u5c31\u662f\u5206\u7fa4\u7684\u985e\u5225\u6578\u91cf\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u8a2d\u5b9a\u5e38\u6578\u80fd\u5920\u4fdd\u8b49\u6bcf\u6b21\u5206\u7fa4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba300\u4ee3\u3002 Attributes: - inertia_: inertia_\uff1afloat\uff0c\u6bcf\u500b\u9ede\u5230\u5176\u4ed6\u53e2\u96c6\u7684\u8cea\u5fc3\u7684\u8ddd\u96e2\u4e4b\u548c\u3002 - cluster_centers_\uff1a \u7279\u5fb5\u7684\u4e2d\u5fc3\u9ede [n_clusters, n_features] \u3002 Methods: - fit: K\u500b\u96c6\u7fa4\u5206\u985e\u6a21\u578b\u8a13\u7df4\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u985e\u5225\u3002 - fit_predict: \u5148\u547c\u53ebfit()\u505a\u96c6\u7fa4\u5206\u985e\uff0c\u4e4b\u5f8c\u5728\u547c\u53ebpredict()\u9810\u6e2c\u6700\u7d42\u985e\u5225\u4e26\u56de\u50b3\u8f38\u51fa\u3002 - transform: \u56de\u50b3\u7684\u9663\u5217\u6bcf\u4e00\u884c\u662f\u6bcf\u4e00\u500b\u6a23\u672c\u5230kmeans\u4e2d\u5404\u500b\u4e2d\u5fc3\u9ede\u7684L2(\u6b50\u5e7e\u91cc\u5f97)\u8ddd\u96e2\u3002 - fit_transform: \u5148\u547c\u53ebfit()\u518d\u57f7\u884ctransform()\u3002 from sklearn.cluster import KMeans kmeansModel = KMeans ( n_clusters = 3 , random_state = 46 ) clusters_pred = kmeansModel . fit_predict ( X )","title":"K-Means"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_4","text":"\u4f7f\u7528\u8005\u8a2d\u5b9a K \u500b\u5206\u7fa4\u5f8c\uff0c\u8a72\u6f14\u7b97\u6cd5\u5feb\u901f\u7684\u627e\u5230 K \u500b\u4e2d\u5fc3\u9ede\u4e26\u5b8c\u6210\u5206\u7fa4\u5206\u985e\u3002\u64ec\u5408\u597d\u6a21\u578b\u5f8c\u6211\u5011\u53ef\u4ee5\u8a08\u7b97\u5404\u500b\u6a23\u672c\u5230\u8a72\u7fa4\u7684\u4e2d\u5fc3\u9ede\u7684\u8ddd\u96e2\u4e4b\u5e73\u65b9\u548c\uff0c\u7528\u4f86\u8a55\u4f30\u96c6\u7fa4\u7684\u6210\u6548\uff0c\u5176 inertia \u8d8a\u5927\u4ee3\u8868\u8d8a\u5dee\u3002 kmeansModel . inertia_ \u8f38\u51fa\u7d50\u679c\uff1a 78.94084142614602 \u82e5\u8981\u67e5\u770b\u5404\u7fa4\u96c6\u7684\u4e2d\u5fc3\u9ede\uff0c\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u78bc\u3002 kmeansModel . cluster_centers_ \u8f38\u51fa\u7d50\u679c\uff1a array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097], [5.006 , 3.428 , 1.462 , 0.246 ], [6.85 , 3.07368421, 5.74210526, 2.07105263]])","title":"\u8a55\u4f30\u5206\u7fa4\u7d50\u679c"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_5","text":"","title":"\u5206\u985e\u7d50\u679c"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k","text":"\u7576\u4f60\u624b\u908a\u6709\u4e00\u7fa4\u8cc7\u6599\uff0c\u4e14\u7121\u6cd5\u4e00\u773c\u770b\u51fa\u6709\u591a\u5c11\u500b\u4e2d\u5fc3\u7684\u72c0\u6cc1\u3002\u53ef\u7528\u4f7f\u7528\u4e0b\u9762\u5169\u7a2e\u65b9\u6cd5\u505a k-means \u6a21\u578b\u8a55\u4f30\u3002 Inertia \u8a08\u7b97\u6240\u6709\u9ede\u5230\u6bcf\u7fa4\u96c6\u4e2d\u5fc3\u8ddd\u96e2\u7684\u5e73\u65b9\u548c\u3002 silhouette scores \u5074\u5f71\u51fd\u6578\u9a57\u8b49\u6578\u64da\u96c6\u7fa4\u5167\u4e00\u81f4\u6027\u7684\u65b9\u6cd5\u3002","title":"\u5982\u4f55\u6c7a\u5b9aK?"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#inertia","text":"\u7576K\u503c\u8d8a\u4f86\u8d8a\u5927\uff0cinertia \u6703\u96a8\u4e4b\u8d8a\u4f86\u8d8a\u5c0f\u3002\u6b63\u5e38\u60c5\u6cc1\u4e0b\u4e0d\u6703\u53d6K\u6700\u5927\u7684\uff0c\u4e00\u822c\u662f\u53d6 elbow point \u9644\u8fd1\u4f5c\u70ba K\uff0c\u5373 inertia \u8fc5\u901f\u4e0b\u964d\u8f49\u70ba\u5e73\u7de9\u7684\u90a3\u500b\u9ede\u3002 # k = 1~9 \u505a9\u6b21kmeans, \u4e26\u5c07\u6bcf\u6b21\u7d50\u679c\u7684inertia\u6536\u96c6\u5728\u4e00\u500blist\u88e1 kmeans_list = [ KMeans ( n_clusters = k , random_state = 46 ) . fit ( X ) for k in range ( 1 , 10 )] inertias = [ model . inertia_ for model in kmeans_list ]","title":"\u4f7f\u7528 inertia \u505a\u6a21\u578b\u8a55\u4f30"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#silhouette-scores","text":"\u53e6\u5916\u4e00\u500b\u65b9\u6cd5\u662f\u7528 silhouette scores \u53bb\u8a55\u4f30\uff0c\u5176\u5206\u6578\u8d8a\u5927\u4ee3\u8868\u5206\u7fa4\u6548\u679c\u8d8a\u597d\u3002 from sklearn.metrics import silhouette_score silhouette_scores = [ silhouette_score ( X , model . labels_ ) for model in kmeans_list [ 1 :]] \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528 silhouette scores \u505a\u6a21\u578b\u8a55\u4f30"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/","text":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u964d\u7dad\u89c0\u5ff5 \u4f55\u8b02\u964d\u7dad? \u964d\u7dad\u6709\u4ec0\u9ebc\u512a\u9ede? \u5e38\u898b\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5 PCA & t-SNE \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u964d\u7dad (Dimension Reduction) \u4e00\u822c\u8cc7\u6599\u5e38\u898b\u7684\u8868\u793a\u65b9\u6cd5\u6709\u4e00\u7dad(\u6578\u7dda)\u3001\u4e8c\u7dad(XY\u5e73\u9762)\u548c\u4e09\u7dad(XYZ\u7acb\u9ad4)\u3002\u7576\u5927\u65bc\u4e09\u7dad\u7684\u8cc7\u6599\u5c31\u96e3\u4ee5\u8996\u89ba\u5316\u5448\u73fe\uff0c\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8868\u793a\u9ad8\u7dad\u5ea6\u7684\u8cc7\u6599\u540c\u6642\u53c8\u4e0d\u80fd\u58d3\u7e2e\u539f\u672c\u8cc7\u6599\u9593\u5f7c\u6b64\u7684\u95dc\u9023\u6027\u5462\uff1f\u9019\u6642\u964d\u7dad\u5c31\u80fd\u5e6b\u52a9\u4f60\u4e86\uff01\u964d\u7dad\u9867\u540d\u601d\u7fa9\uff0c\u5c31\u662f\u539f\u672c\u7684\u8cc7\u6599\u8655\u65bc\u5728\u4e00\u500b\u6bd4\u8f03\u9ad8\u7684\u7dad\u5ea6\u4f5c\u6a19\u4e0a\uff0c\u6211\u5011\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4f4e\u7dad\u5ea6\u7684\u4f5c\u6a19\u4f86\u63cf\u8ff0\u5b83\uff0c\u4f46\u53c8\u4e0d\u80fd\u5931\u53bb\u8cc7\u6599\u672c\u8eab\u7684\u7279\u8cea\u3002 \u70ba\u4ec0\u9ebc\u8981\u964d\u7dad? \u60f3\u60f3\u770b\u5982\u679c\u6211\u5011\u80fd\u5920\u628a\u4e00\u4e9b\u8cc7\u6599\u505a\u58d3\u7e2e\uff0c\u540c\u6642\u53c8\u80fd\u5920\u4fdd\u6301\u8cc7\u6599\u539f\u4f86\u7684\u7279\u6027\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u7528\u6bd4\u8f03\u5c11\u7684\u7a7a\u9593\uff0c\u6216\u662f\u8a08\u7b97\u6642\u7528\u6bd4\u8f03\u5c11\u7684\u8cc7\u6e90\u5c31\u53ef\u4ee5\u5f97\u5230\u8ddf\u6c92\u6709\u505a\u8cc7\u6599\u58d3\u7e2e\u4e4b\u524d\u5f97\u5230\u76f8\u4f3c\u7684\u7d50\u679c\u3002\u6b64\u5916\u8cc7\u6599\u964d\u7dad\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u9032\u884c\u8cc7\u6599\u8996\u89ba\u5316\uff0c\u4e8c\u7dad\u53ef\u4ee5\u7528\u5e73\u9762\u5716\u8868\u793a\u3001\u4e09\u7dad\u53ef\u4ee5\u7528\u7acb\u9ad4\u5716\u4f5c\u8868\u793a\uff0c\u800c\u5927\u65bc\u4e09\u7dad\u7684\u7a7a\u9593\u96e3\u4ee5\u8996\u89ba\u5316\u505a\u5448\u73fe\u3002 \u964d\u7dad\u6f14\u7b97\u6cd5 \u5e38\u898b\u7684\u964d\u7dad\u65b9\u6cd5\u6709\u5169\u7a2e\u5206\u5225\u6709\u7dda\u6027\u65b9\u6cd5\u7684\u4e3b\u6210\u5206\u5206\u6790(PCA)\u4ee5\u53ca\u975e\u7dda\u6027\u7684 t-\u96a8\u6a5f\u9130\u8fd1\u5d4c\u5165\u6cd5(t-SNE)\u3002\u4e0b\u5716\u4f8b\u5b50\u662f\u5c07 28*28 \u5927\u5c0f\u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\uff0c\u5206\u5225\u900f\u904e\u4e0a\u8ff0\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5\u5c07\u4e00\u5f35 784 \u500b\u50cf\u7d20\u7684\u5f71\u50cf\u964d\u6210 2 \u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u5ea7\u6a19\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe PCA \u964d\u70ba\u5f8c\u53ef\u4ee5\u5927\u81f4\u5c07 0~9 \u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\u5728\u5e73\u9762\u4e0a\u5206\u6210\u5341\u7fa4\uff0c\u4e0d\u904e\u5f7c\u6b64\u9593\u7684\u754c\u7dda\u9084\u662f\u5f88\u6a21\u7cca\u3002\u800c\u6211\u5011\u900f\u904e t-SNE \u65b9\u6cd5\u964d\u70ba\u5f8c\u53ef\u4ee5\u770b\u5230\u5e73\u9762\u4e0a\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5341\u500b\u6578\u5b57\u5206\u6210\u5341\u7fa4\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u624b\u5beb\u6578\u5b57\u7684\u5f71\u50cf\u5728\u975e\u7dda\u6027\u7684\u964d\u7dad\u8f49\u63db\u6548\u679c\u662f\u6bd4\u8f03\u597d\u7684\u3002 Principal component analysis (PCA) T-Distributed Stochastic Neighbor Embedding (t-SNE) \u56e0\u70ba t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\uff0c\u6b64\u5916 t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u56e0\u6b64\u5728\u9019\u7a2e 0~9 \u6709\u5341\u500b\u5206\u985e\u7684\u60c5\u6cc1\u4e0b\u53ef\u4ee5\u78ba\u4fdd\u5f7c\u6b64\u9593\u7684\u8ddd\u96e2\u6703\u88ab\u5340\u9694\u8a72\u800c\u4e0d\u6703\u91cd\u758a\u3002 Principal component analysis (PCA) \u4e3b\u6210\u4efd\u5206\u6790(Principal component analysis, PCA)\u3002\u5176\u4e3b\u8981\u76ee\u7684\u662f\u628a\u9ad8\u7dad\u7684\u9ede\u982d\u5f71\u5230\u4f4e\u7dad\u7684\u7a7a\u9593\u4e0a\uff0c\u4e26\u4e14\u4f4e\u7dad\u5ea6\u7684\u7a7a\u9593\u4fdd\u6709\u9ad8\u7dad\u7a7a\u9593\u4e2d\u5927\u90e8\u5206\u7684\u6027\u8cea\u3002\u900f\u904e\u5c07\u4e00\u500b\u5177\u6709 n \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u8f49\u63db\u70ba\u5177\u6709 k \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u5176\u4e2d k \u5fc5\u5b9a\u8981\u5c0f\u65bc n\u3002\u6b64\u5916 PCA \u53ea\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u6211\u5011\u5c07\u6350\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u3002\u5c07\u539f\u6709\u56db\u500b\u7279\u5fb5\u5206\u5225\u6709\u82b1\u74e3\u8207\u82b1\u843c\u7684\u9577\u8207\u5bec\uff0c\u900f\u904e\u7dda\u6027\u8f49\u63db\u6210\u5169\u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e09\u7a2e\u82b1\u7684\u985e\u5225\u5728\u5e73\u9762\u4e0a\u5404\u81ea\u90fd\u6709\u7dda\u6027\u7684\u8da8\u52e2\uff0c\u4e5f\u5c31\u662f\u5716\u4e2d\u7d05\u8272\u7684\u7dda\u689d\u3002 PCA\u7684\u4e3b\u8981\u6b65\u9a5f \u9996\u5148\u4e00\u958b\u59cb\u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5\uff0c\u4e5f\u5c31\u662f\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5e73\u5747\u3002\u63a5\u8457\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5\uff0c\u4e5f\u5c31\u662f\u505a\u8cc7\u6599\u9ede\u7684\u5e73\u79fb\uff0c\u5e73\u79fb\u5f8c\u539f\u9ede\u662f\u6240\u6709\u9ede\u7684\u4e2d\u5fc3\u3002\u7b2c\u4e09\u6b65\u8a08\u7b97\u7279\u5fb5\u5354\u65b9\u5dee\u77e9\u9663\uff0c\u5176\u4e2d\u77e9\u9663\u5c0d\u89d2\u7dda\u4e0a\u5206\u5225\u662f\u6bcf\u500b\u7279\u5fb5\u7684\u65b9\u5dee\uff0c\u800c\u975e\u5c0d\u89d2\u7dda\u4e0a\u7684\u6578\u503c\u662f\u4e0d\u540c\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5354\u65b9\u5dee\u3002\u5354\u65b9\u5dee\u662f\u8861\u91cf\u5169\u500b\u8b8a\u6578\u540c\u6642\u8b8a\u5316\u7684\u8b8a\u5316\u7a0b\u5ea6\uff0c\u5354\u65b9\u5dee\u7d55\u5c0d\u503c\u8d8a\u5927\u5169\u8005\u5c0d\u5f7c\u6b64\u7684\u5f71\u97ff\u8d8a\u5927\u3002\u7b2c\u56db\u6b65\u9a5f\u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3\uff0c\u8a08\u7b97\u5354\u65b9\u5dee\u77e9\u9663\u7684\u7279\u5fb5\u5411\u91cf\u548c\u7279\u5fb5\u503c\u4e26\u9078\u53d6\u7279\u5fb5\u5411\u91cf\u3002\u7b2c\u4e94\u6b65\u9a5f\u5c07\u7279\u5fb5\u503c\u7531\u5c0f\u5230\u5927\u6392\u5e8f\uff0c\u4e26\u9078\u53d6\u5176\u4e2d\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u3002\u7136\u5f8c\u5c07\u9019\u4e9b k \u500b\u7279\u5fb5\u5411\u91cf\u4f5c\u70ba\u7279\u5fb5\u5411\u91cf\u77e9\u9663\u3002\u6700\u5f8c\u5c0d\u8cc7\u6599\u96c6\u4e2d\u7684\u6bcf\u4e00\u500b\u7279\u5fb5\u8f49\u63db\u70ba\u65b0\u7684\u7279\u5fb5\u3002 \u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5 \u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5 \u8a08\u7b97\u7279\u5fb5\u7684\u5354\u65b9\u5dee\u77e9\u9663 \u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3 \u53d6\u51fa\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u503c\u5c0d\u61c9\u7684\u7279\u5fb5\u5411\u91cf \u5c07\u8cc7\u6599\u9ede\u6295\u5f71\u5230\u9078\u53d6\u7684\u7279\u5fb5\u5411\u91cf\u4e0a T-Distributed Stochastic Neighbor Embedding (t-SNE) t-SNE \u76ee\u6a19\u8ddf PCA \u662f\u4e00\u6a23\u7684\uff0c\u5b83\u5011\u90fd\u5e0c\u671b\u628a\u9ad8\u7dad\u7684\u8cc7\u6599\u6295\u5f71\u5230\u4f4e\u7dad\u4e2d\uff0c\u4e26\u4e14\u4fdd\u7559\u9ad8\u7dad\u4e2d\u7684\u9ede\u8207\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u8207\u7279\u6027\u3002\u5169\u8005\u4e0d\u540c\u7684\u9ede\u5728\u65bc t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\u3002\u56e0\u70ba t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u5c07\u9ad8\u7dad\u7684\u6578\u64da\u7528\u9ad8\u65af\u5206\u4f48\u7684\u6a5f\u7387\u5bc6\u5ea6\u51fd\u6578\u8fd1\u4f3c\uff0c\u800c\u4f4e\u7dad\u6578\u64da\u7684\u90e8\u5206\u4f7f\u7528 t \u5206\u4f48\u7684\u65b9\u5f0f\u4f86\u8fd1\u4f3c\u3002 PCA & t-SNE \u6574\u7406 PCA\u548ct-SNE\u662f\u5169\u500b\u4e0d\u540c\u964d\u7dad\u7684\u65b9\u6cd5\uff0cPCA\u7684\u512a\u9ede\u5728\u65bc\u7c21\u55ae\u82e5\u65b0\u7684\u9ede\u8981\u6620\u5c04\u6642\u76f4\u63a5\u4ee3\u5165\u516c\u5f0f\u5373\u53ef\u5f97\u51fa\u964d\u7dad\u5f8c\u7684\u9ede\u3002\u82e5t-SNE\u6709\u65b0\u7684\u9ede\u8fd1\u4f86\u6642\u6211\u5011\u6c92\u6709\u53bb\u8a08\u7b97\u65b0\u7684\u9ede\u548c\u820a\u7684\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u56e0\u6b64 \u6211\u5011\u7121\u6cd5\u5c07\u65b0\u7684\u9ede\u6295\u5f71\u4e0b\u53bb\u3002t-SNE\u7684\u512a\u9ede\u662f\u53ef\u4ee5\u4fdd\u7559\u539f\u672c\u9ad8\u7dad\u8ddd\u96e2\u8f03\u9060\u7684\u9ede\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301 \u9060\u7684\u8ddd\u96e2\uff0c\u56e0\u6b64\u9019\u4e9b\u7fa4\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301\u7fa4\u7684\u7279\u6027\u3002 PCA\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db t-SNE\u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db [\u7a0b\u5f0f\u5be6\u4f5c] PCA from sklearn.decomposition import PCA pca = PCA ( n_components = 2 , iterated_power = 1 ) train_reduced = pca . fit_transform ( X_train ) print ( 'PCA\u65b9\u5dee\u6bd4: ' , pca . explained_variance_ratio_ ) print ( 'PCA\u65b9\u5dee\u503c:' , pca . explained_variance_ ) t-SNE from sklearn.manifold import TSNE tsneModel = TSNE ( n_components = 2 , random_state = 42 , n_iter = 1000 ) train_reduced = tsneModel . fit_transform ( X_train ) t-SNE \u4e0d\u9069\u7528\u65bc\u65b0\u8cc7\u6599\u3002PCA \u964d\u7dad\u53ef\u4ee5\u9069\u7528\u65b0\u8cc7\u6599\uff0c\u53ef\u547c\u53ebtransform() \u51fd\u5f0f\u5373\u53ef\u3002\u800c t-SNE \u5247\u4e0d\u884c\u3002\u56e0\u70ba\u6f14\u7b97\u6cd5\u7684\u95dc\u4fc2\u5728 scikit-learn \u5957\u4ef6\u4e2d\u7684 t-SNE \u6f14\u7b97\u6cd5\u4e26\u6c92\u6709transform() \u51fd\u5f0f\u53ef\u4ee5\u547c\u53eb\u3002 Reference \u6df1\u5165\u5b78\u7fd2\u4e3b\u6210\u5206\u5206\u6790\uff08PCA\uff09\u6f14\u7b97\u6cd5\u539f\u7406\u53ca\u5176Python\u5be6\u73fe \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#day-7-","text":"","title":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_1","text":"\u964d\u7dad\u89c0\u5ff5 \u4f55\u8b02\u964d\u7dad? \u964d\u7dad\u6709\u4ec0\u9ebc\u512a\u9ede? \u5e38\u898b\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5 PCA & t-SNE \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#dimension-reduction","text":"\u4e00\u822c\u8cc7\u6599\u5e38\u898b\u7684\u8868\u793a\u65b9\u6cd5\u6709\u4e00\u7dad(\u6578\u7dda)\u3001\u4e8c\u7dad(XY\u5e73\u9762)\u548c\u4e09\u7dad(XYZ\u7acb\u9ad4)\u3002\u7576\u5927\u65bc\u4e09\u7dad\u7684\u8cc7\u6599\u5c31\u96e3\u4ee5\u8996\u89ba\u5316\u5448\u73fe\uff0c\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8868\u793a\u9ad8\u7dad\u5ea6\u7684\u8cc7\u6599\u540c\u6642\u53c8\u4e0d\u80fd\u58d3\u7e2e\u539f\u672c\u8cc7\u6599\u9593\u5f7c\u6b64\u7684\u95dc\u9023\u6027\u5462\uff1f\u9019\u6642\u964d\u7dad\u5c31\u80fd\u5e6b\u52a9\u4f60\u4e86\uff01\u964d\u7dad\u9867\u540d\u601d\u7fa9\uff0c\u5c31\u662f\u539f\u672c\u7684\u8cc7\u6599\u8655\u65bc\u5728\u4e00\u500b\u6bd4\u8f03\u9ad8\u7684\u7dad\u5ea6\u4f5c\u6a19\u4e0a\uff0c\u6211\u5011\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4f4e\u7dad\u5ea6\u7684\u4f5c\u6a19\u4f86\u63cf\u8ff0\u5b83\uff0c\u4f46\u53c8\u4e0d\u80fd\u5931\u53bb\u8cc7\u6599\u672c\u8eab\u7684\u7279\u8cea\u3002","title":"\u964d\u7dad (Dimension Reduction)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_2","text":"\u60f3\u60f3\u770b\u5982\u679c\u6211\u5011\u80fd\u5920\u628a\u4e00\u4e9b\u8cc7\u6599\u505a\u58d3\u7e2e\uff0c\u540c\u6642\u53c8\u80fd\u5920\u4fdd\u6301\u8cc7\u6599\u539f\u4f86\u7684\u7279\u6027\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u7528\u6bd4\u8f03\u5c11\u7684\u7a7a\u9593\uff0c\u6216\u662f\u8a08\u7b97\u6642\u7528\u6bd4\u8f03\u5c11\u7684\u8cc7\u6e90\u5c31\u53ef\u4ee5\u5f97\u5230\u8ddf\u6c92\u6709\u505a\u8cc7\u6599\u58d3\u7e2e\u4e4b\u524d\u5f97\u5230\u76f8\u4f3c\u7684\u7d50\u679c\u3002\u6b64\u5916\u8cc7\u6599\u964d\u7dad\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u9032\u884c\u8cc7\u6599\u8996\u89ba\u5316\uff0c\u4e8c\u7dad\u53ef\u4ee5\u7528\u5e73\u9762\u5716\u8868\u793a\u3001\u4e09\u7dad\u53ef\u4ee5\u7528\u7acb\u9ad4\u5716\u4f5c\u8868\u793a\uff0c\u800c\u5927\u65bc\u4e09\u7dad\u7684\u7a7a\u9593\u96e3\u4ee5\u8996\u89ba\u5316\u505a\u5448\u73fe\u3002","title":"\u70ba\u4ec0\u9ebc\u8981\u964d\u7dad?"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_3","text":"\u5e38\u898b\u7684\u964d\u7dad\u65b9\u6cd5\u6709\u5169\u7a2e\u5206\u5225\u6709\u7dda\u6027\u65b9\u6cd5\u7684\u4e3b\u6210\u5206\u5206\u6790(PCA)\u4ee5\u53ca\u975e\u7dda\u6027\u7684 t-\u96a8\u6a5f\u9130\u8fd1\u5d4c\u5165\u6cd5(t-SNE)\u3002\u4e0b\u5716\u4f8b\u5b50\u662f\u5c07 28*28 \u5927\u5c0f\u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\uff0c\u5206\u5225\u900f\u904e\u4e0a\u8ff0\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5\u5c07\u4e00\u5f35 784 \u500b\u50cf\u7d20\u7684\u5f71\u50cf\u964d\u6210 2 \u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u5ea7\u6a19\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe PCA \u964d\u70ba\u5f8c\u53ef\u4ee5\u5927\u81f4\u5c07 0~9 \u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\u5728\u5e73\u9762\u4e0a\u5206\u6210\u5341\u7fa4\uff0c\u4e0d\u904e\u5f7c\u6b64\u9593\u7684\u754c\u7dda\u9084\u662f\u5f88\u6a21\u7cca\u3002\u800c\u6211\u5011\u900f\u904e t-SNE \u65b9\u6cd5\u964d\u70ba\u5f8c\u53ef\u4ee5\u770b\u5230\u5e73\u9762\u4e0a\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5341\u500b\u6578\u5b57\u5206\u6210\u5341\u7fa4\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u624b\u5beb\u6578\u5b57\u7684\u5f71\u50cf\u5728\u975e\u7dda\u6027\u7684\u964d\u7dad\u8f49\u63db\u6548\u679c\u662f\u6bd4\u8f03\u597d\u7684\u3002 Principal component analysis (PCA) T-Distributed Stochastic Neighbor Embedding (t-SNE) \u56e0\u70ba t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\uff0c\u6b64\u5916 t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u56e0\u6b64\u5728\u9019\u7a2e 0~9 \u6709\u5341\u500b\u5206\u985e\u7684\u60c5\u6cc1\u4e0b\u53ef\u4ee5\u78ba\u4fdd\u5f7c\u6b64\u9593\u7684\u8ddd\u96e2\u6703\u88ab\u5340\u9694\u8a72\u800c\u4e0d\u6703\u91cd\u758a\u3002","title":"\u964d\u7dad\u6f14\u7b97\u6cd5"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#principal-component-analysis-pca","text":"\u4e3b\u6210\u4efd\u5206\u6790(Principal component analysis, PCA)\u3002\u5176\u4e3b\u8981\u76ee\u7684\u662f\u628a\u9ad8\u7dad\u7684\u9ede\u982d\u5f71\u5230\u4f4e\u7dad\u7684\u7a7a\u9593\u4e0a\uff0c\u4e26\u4e14\u4f4e\u7dad\u5ea6\u7684\u7a7a\u9593\u4fdd\u6709\u9ad8\u7dad\u7a7a\u9593\u4e2d\u5927\u90e8\u5206\u7684\u6027\u8cea\u3002\u900f\u904e\u5c07\u4e00\u500b\u5177\u6709 n \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u8f49\u63db\u70ba\u5177\u6709 k \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u5176\u4e2d k \u5fc5\u5b9a\u8981\u5c0f\u65bc n\u3002\u6b64\u5916 PCA \u53ea\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u6211\u5011\u5c07\u6350\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u3002\u5c07\u539f\u6709\u56db\u500b\u7279\u5fb5\u5206\u5225\u6709\u82b1\u74e3\u8207\u82b1\u843c\u7684\u9577\u8207\u5bec\uff0c\u900f\u904e\u7dda\u6027\u8f49\u63db\u6210\u5169\u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e09\u7a2e\u82b1\u7684\u985e\u5225\u5728\u5e73\u9762\u4e0a\u5404\u81ea\u90fd\u6709\u7dda\u6027\u7684\u8da8\u52e2\uff0c\u4e5f\u5c31\u662f\u5716\u4e2d\u7d05\u8272\u7684\u7dda\u689d\u3002","title":"Principal component analysis (PCA)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca","text":"\u9996\u5148\u4e00\u958b\u59cb\u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5\uff0c\u4e5f\u5c31\u662f\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5e73\u5747\u3002\u63a5\u8457\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5\uff0c\u4e5f\u5c31\u662f\u505a\u8cc7\u6599\u9ede\u7684\u5e73\u79fb\uff0c\u5e73\u79fb\u5f8c\u539f\u9ede\u662f\u6240\u6709\u9ede\u7684\u4e2d\u5fc3\u3002\u7b2c\u4e09\u6b65\u8a08\u7b97\u7279\u5fb5\u5354\u65b9\u5dee\u77e9\u9663\uff0c\u5176\u4e2d\u77e9\u9663\u5c0d\u89d2\u7dda\u4e0a\u5206\u5225\u662f\u6bcf\u500b\u7279\u5fb5\u7684\u65b9\u5dee\uff0c\u800c\u975e\u5c0d\u89d2\u7dda\u4e0a\u7684\u6578\u503c\u662f\u4e0d\u540c\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5354\u65b9\u5dee\u3002\u5354\u65b9\u5dee\u662f\u8861\u91cf\u5169\u500b\u8b8a\u6578\u540c\u6642\u8b8a\u5316\u7684\u8b8a\u5316\u7a0b\u5ea6\uff0c\u5354\u65b9\u5dee\u7d55\u5c0d\u503c\u8d8a\u5927\u5169\u8005\u5c0d\u5f7c\u6b64\u7684\u5f71\u97ff\u8d8a\u5927\u3002\u7b2c\u56db\u6b65\u9a5f\u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3\uff0c\u8a08\u7b97\u5354\u65b9\u5dee\u77e9\u9663\u7684\u7279\u5fb5\u5411\u91cf\u548c\u7279\u5fb5\u503c\u4e26\u9078\u53d6\u7279\u5fb5\u5411\u91cf\u3002\u7b2c\u4e94\u6b65\u9a5f\u5c07\u7279\u5fb5\u503c\u7531\u5c0f\u5230\u5927\u6392\u5e8f\uff0c\u4e26\u9078\u53d6\u5176\u4e2d\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u3002\u7136\u5f8c\u5c07\u9019\u4e9b k \u500b\u7279\u5fb5\u5411\u91cf\u4f5c\u70ba\u7279\u5fb5\u5411\u91cf\u77e9\u9663\u3002\u6700\u5f8c\u5c0d\u8cc7\u6599\u96c6\u4e2d\u7684\u6bcf\u4e00\u500b\u7279\u5fb5\u8f49\u63db\u70ba\u65b0\u7684\u7279\u5fb5\u3002 \u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5 \u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5 \u8a08\u7b97\u7279\u5fb5\u7684\u5354\u65b9\u5dee\u77e9\u9663 \u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3 \u53d6\u51fa\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u503c\u5c0d\u61c9\u7684\u7279\u5fb5\u5411\u91cf \u5c07\u8cc7\u6599\u9ede\u6295\u5f71\u5230\u9078\u53d6\u7684\u7279\u5fb5\u5411\u91cf\u4e0a","title":"PCA\u7684\u4e3b\u8981\u6b65\u9a5f"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#t-distributed-stochastic-neighbor-embedding-t-sne","text":"t-SNE \u76ee\u6a19\u8ddf PCA \u662f\u4e00\u6a23\u7684\uff0c\u5b83\u5011\u90fd\u5e0c\u671b\u628a\u9ad8\u7dad\u7684\u8cc7\u6599\u6295\u5f71\u5230\u4f4e\u7dad\u4e2d\uff0c\u4e26\u4e14\u4fdd\u7559\u9ad8\u7dad\u4e2d\u7684\u9ede\u8207\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u8207\u7279\u6027\u3002\u5169\u8005\u4e0d\u540c\u7684\u9ede\u5728\u65bc t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\u3002\u56e0\u70ba t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u5c07\u9ad8\u7dad\u7684\u6578\u64da\u7528\u9ad8\u65af\u5206\u4f48\u7684\u6a5f\u7387\u5bc6\u5ea6\u51fd\u6578\u8fd1\u4f3c\uff0c\u800c\u4f4e\u7dad\u6578\u64da\u7684\u90e8\u5206\u4f7f\u7528 t \u5206\u4f48\u7684\u65b9\u5f0f\u4f86\u8fd1\u4f3c\u3002","title":"T-Distributed Stochastic Neighbor Embedding (t-SNE)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca-t-sne","text":"PCA\u548ct-SNE\u662f\u5169\u500b\u4e0d\u540c\u964d\u7dad\u7684\u65b9\u6cd5\uff0cPCA\u7684\u512a\u9ede\u5728\u65bc\u7c21\u55ae\u82e5\u65b0\u7684\u9ede\u8981\u6620\u5c04\u6642\u76f4\u63a5\u4ee3\u5165\u516c\u5f0f\u5373\u53ef\u5f97\u51fa\u964d\u7dad\u5f8c\u7684\u9ede\u3002\u82e5t-SNE\u6709\u65b0\u7684\u9ede\u8fd1\u4f86\u6642\u6211\u5011\u6c92\u6709\u53bb\u8a08\u7b97\u65b0\u7684\u9ede\u548c\u820a\u7684\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u56e0\u6b64 \u6211\u5011\u7121\u6cd5\u5c07\u65b0\u7684\u9ede\u6295\u5f71\u4e0b\u53bb\u3002t-SNE\u7684\u512a\u9ede\u662f\u53ef\u4ee5\u4fdd\u7559\u539f\u672c\u9ad8\u7dad\u8ddd\u96e2\u8f03\u9060\u7684\u9ede\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301 \u9060\u7684\u8ddd\u96e2\uff0c\u56e0\u6b64\u9019\u4e9b\u7fa4\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301\u7fa4\u7684\u7279\u6027\u3002 PCA\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db t-SNE\u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db","title":"PCA & t-SNE \u6574\u7406"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_4","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca_1","text":"from sklearn.decomposition import PCA pca = PCA ( n_components = 2 , iterated_power = 1 ) train_reduced = pca . fit_transform ( X_train ) print ( 'PCA\u65b9\u5dee\u6bd4: ' , pca . explained_variance_ratio_ ) print ( 'PCA\u65b9\u5dee\u503c:' , pca . explained_variance_ )","title":"PCA"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#t-sne","text":"from sklearn.manifold import TSNE tsneModel = TSNE ( n_components = 2 , random_state = 42 , n_iter = 1000 ) train_reduced = tsneModel . fit_transform ( X_train ) t-SNE \u4e0d\u9069\u7528\u65bc\u65b0\u8cc7\u6599\u3002PCA \u964d\u7dad\u53ef\u4ee5\u9069\u7528\u65b0\u8cc7\u6599\uff0c\u53ef\u547c\u53ebtransform() \u51fd\u5f0f\u5373\u53ef\u3002\u800c t-SNE \u5247\u4e0d\u884c\u3002\u56e0\u70ba\u6f14\u7b97\u6cd5\u7684\u95dc\u4fc2\u5728 scikit-learn \u5957\u4ef6\u4e2d\u7684 t-SNE \u6f14\u7b97\u6cd5\u4e26\u6c92\u6709transform() \u51fd\u5f0f\u53ef\u4ee5\u547c\u53eb\u3002","title":"t-SNE"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#reference","text":"\u6df1\u5165\u5b78\u7fd2\u4e3b\u6210\u5206\u5206\u6790\uff08PCA\uff09\u6f14\u7b97\u6cd5\u539f\u7406\u53ca\u5176Python\u5be6\u73fe \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/","text":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression) ## \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 - \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 - \u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u4f86\u627e\u51fa\u4e00\u689d\u51fd\u5f0f\uff0c\u4f86\u6700\u4f73\u5316\u6a21\u578b - \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 - \u7dda\u6027\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b - \u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u3001\u591a\u5143\u8ff4\u6b78\u3001\u975e\u7dda\u6027\u8ff4\u6b78 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 \u7dda\u6027\u8ff4\u6b78\u662f\u7d71\u8a08\u4e0a\u5728\u627e\u591a\u500b\u81ea\u8b8a\u6578\u548c\u4f9d\u8b8a\u6578\u4e4b\u9593\u7684\u95dc\u4fc2\u6240\u5efa\u51fa\u4f86\u7684\u6a21\u578b\u3002\u53ea\u6709\u4e00\u500b\u81ea\u8b8a\u6578(x)\u548c\u4e00\u500b\u4f9d\u8b8a\u6578(y)\u7684\u60c5\u5f62\u7a31\u70ba\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u5927\u65bc\u4e00\u500b\u81ea\u8b8a\u6578(x 1 ,x 2 ,...)\u7684\u60c5\u5f62\u7a31\u70ba\u591a\u5143\u8ff4\u6b78\u3002 \u4e00\u500b\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78: y=ax+b\uff0c\u5176\u4e2d b\uff1a\u622a\u8ddd(Intercept)\uff0ca\uff1a\u659c\u7387(Slope) \u70ba x \u8b8a\u52d5\u4e00\u500b\u55ae\u4f4d y \u8b8a\u52d5\u7684\u91cf\uff0c\u5982\u4e0b\u5716: \u8ff4\u6b78\u5206\u6790\u7684\u76ee\u6a19\u51fd\u6578\u6216\u7a31\u640d\u5931\u51fd\u6578(loss function)\u5c31\u662f\u5e0c\u671b\u627e\u5230\u7684\u6a21\u578b\u6700\u7d42\u7684\u6b98\u5dee\u8d8a\u5c0f\u8d8a\u597d\uff0c\u4f86\u627e\u53c3\u6578 a \u548c b\u3002 \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 \u7dda\u6027\u6a21\u578b\u6700\u5e38\u898b\u7684\u89e3\u6cd5\u6709\u5169\u7a2e\uff0c\u5206\u5225\u70ba Closed-form (\u9589\u5f0f\u89e3) \u8207\u68af\u5ea6\u4e0b\u964d (Gradient descent)\u3002\u7576\u7279\u5fb5\u5c11\u6642\u4f7f\u7528 Closed-form \u8f03\u70ba\u9069\u5408\uff0c\u4f7f\u7528\u4e0b\u9762\u516c\u5f0f\u4f86\u6c42\u51fa \u03b8 \u503c\u3002\u6211\u5011\u53c8\u53ef\u4ee5\u8aaa\u7dda\u6027\u6a21\u578b\u7684\u6700\u5c0f\u5e73\u65b9\u6cd5\u7684\u89e3\u5373\u70ba Closed-form\u3002\u82e5\u7576\u662f\u8907\u96dc\u7684\u554f\u984c\u6642 Gradient descen \u8f03\u80fd\u89e3\u6c7a\uff0c\u5176\u539f\u56e0\u662f\u5927\u90e8\u5206\u7684\u554f\u984c\u5176\u5be6\u662f\u6c92\u6709\u516c\u5f0f\u89e3\u7684\u3002\u6211\u5011\u53ea\u80fd\u6c42\u51fa\u4e00\u500b\u51fd\u6578 f(x) \u4f7f\u5176\u8aa4\u5dee\u6700\u5c0f\u8d8a\u597d\u3002 Closed-form Gradient descent ## Least Square Method (\u6700\u5c0f\u5e73\u65b9\u6cd5) \u5047\u8a2d\u4e00\u500b\u5730\u5340\u7684\u623f\u50f9\u8207\u576a\u6578\u662f\u5448\u7dda\u6027\u95dc\u4fc2\uff0c\u4e26\u4ee5\u4e0b\u5716\u4e2d\u7684\u4e09\u500b\u9ede\u8868\u793a\u3002\u5982\u679c\u6211\u5011\u60f3\u900f\u904e\u623f\u5b50\u7684\u576a\u6578\u4f86\u9810\u6e2c\u623f\u50f9\uff0c\u90a3\u9ebc\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8207\u5ea7\u6a19\u5e73\u9762\u4e0a\u9019\u4e09\u500b\u9ede\u7684\u5dee\u8ddd\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9019\u689d\u76f4\u7dda\u8a72\u600e\u9ebc\u627e\u5462\uff1f\u9996\u5148\u6211\u5011\u96a8\u6a5f\u627e\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8a08\u7b97\u9019\u4e09\u9ede\u7684 loss\u3002\u640d\u5931\u51fd\u6578\u53ef\u4ee5\u81ea\u5df1\u5b9a\u7fa9\uff0c\u5047\u8a2d\u6211\u5011\u4f7f\u7528 MSE \u5747\u65b9\u8aa4\u5dee\u4f86\u8a08\u7b97\u3002\u900f\u904e\u4e00\u7cfb\u5217\u8a08\u7b97\u6211\u5011\u5f97\u5230\u4e00\u500b loss \u5373\u70ba MSE \u503c\u3002\u63a5\u8457\u6211\u5011\u5c07\u9019\u500b\u76f4\u7dda\u7a0d\u7a0d\u7684\u8f49\u4e00\u500b\u89d2\u5ea6\u5f8c\u53c8\u53ef\u4ee5\u8a08\u7b97\u4e00\u500b\u65b0\u7684 MSE\uff0c\u6b64\u523b\u6211\u5011\u53ef\u4ee5\u767c\u73fe MSE \u503c\u53c8\u6bd4\u525b\u525b\u66f4\u5c0f\u4e86\u3002\u4e5f\u5c31\u662f\u8aaa\u9019\u4e00\u689d\u65b0\u7684\u76f4\u7dda\u80fd\u5920\u66f4\u6cd5\u61c9\u51fa\u8a13\u7df4\u96c6\u4e2d A\u3001B\u3001C \u7684\u6578\u64da\u9ede\u6240\u53cd\u6620\u7684\u623f\u5c4b\u576a\u6578\u8207\u623f\u50f9\u4e4b\u9593\u7684\u7dda\u6027\u95dc\u4fc2\u3002 \u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\uff0c\u6211\u5011\u53ef\u4ee5\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\u3002\u73fe\u5728\u6211\u5011\u80fd\u505a\u7684\u4e8b\u60c5\u5c31\u662f\u5f9e\u9019\u7121\u6578\u689d\u76f4\u7dda\u4e2d\u9078\u51fa\u4e00\u689d\u6700\u4f73\u7684\u7576\u4f5c\u6211\u5011\u7684\u9810\u6e2c\u6a21\u578b\uff0c\u540c\u6642\u5b83\u9762\u5c0d\u9019\u4e09\u9ede\u7684\u8aa4\u5dee\u662f\u8981\u6700\u5c0f\u7684\u3002\u56e0\u6b64\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u8981\u6700\u5c0f\u5316 MSE \u4e5f\u5c31\u662f\u6240\u8b02\u7684\u640d\u5931\u51fd\u6578 (loss function)\u3002\u6240\u4ee5\u6574\u500b\u7dda\u6027\u8ff4\u6b78\u7684\u76ee\u6a19\u5c31\u662f\u6700\u5c0f\u5316\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\uff0c\u5176\u4e2d\u4e00\u500b\u89e3\u6cd5\u5c31\u662f\u6700\u5c0f\u5e73\u65b9\u6cd5\u3002\u56e0\u70ba MSE \u7b49\u65bc 1/n \u500d\u7684\u6b8b\u5dee\u5e73\u65b9\u548c (RSS)\uff0c\u5176\u4e2d\u5206\u6bcd n \u70ba\u5e38\u6578\uff0c\u4e0d\u5f71\u97ff\u6975\u5c0f\u5316\u6545\u62ff\u6389\u3002\u56e0\u6b64\u6700\u7d42\u7684\u6c42\u89e3\u662f\u6eff\u8db3\u6700\u5c0f\u5316\u5e73\u65b9\u548c\uff0c\u4f7f\u5176\u6700\u5c0f\u5316\u3002\u7d93\u904e\u6578\u5b78\u63a8\u5c0e\u5f8c\uff0c\u7c21\u5316\u7684\u516c\u5f0f\u5982\u4e0b\uff1a \u5c0f\u8a66\u8eab\u624b \u57fa\u65bc\u4e0a\u9762\u7684\u516c\u5f0f\u6211\u5011\u60f3\u627e\u51fa\u4e00\u7d44\u53c3\u6578\u6b0a\u91cd \u03b8\u3002\u4e5f\u5c31\u662f\u4e0b\u5716\u554f\u984c\u4e2d\u7684 a (\u03b8 0 )\u3001b (\u03b8 1 ) \u5169\u53c3\u6578\uff0c\u4f7f\u5f97\u5e73\u9762\u4e0a\u9019\u4e09\u9ede\u5e73\u65b9\u548c\u6709\u6975\u5c0f\u503c\u3002\u9019\u500b\u51fd\u5f0f\u5c0d \u03b8 0 , \u03b8 1 \u504f\u505a\u5fae\u5206\u8a2d\u4ed6\u5011\u70ba0\uff0c\u63a5\u8457\u6211\u5011\u5c0d\u65b9\u7a0b\u5f0f\u6c42\u89e3\u3002 \u6b64\u51fd\u5f0f\u53ea\u6709\u6975\u5c0f\u503c\uff0c\u56e0\u6b64\u6211\u5011\u5f97\u5230\u7684 \u03b8 0 , \u03b8 1 \u6700\u5c0f\u6975\u503c\u7684\u89e3\u3002 \u7bc4\u4f8b\u7a0b\u5f0f (\u623f\u50f9\u9810\u6e2c) \u624b\u523b\u7dda\u6027\u8ff4\u6b78 \u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5efa\u6a21\uff0c\u4e26\u63a1\u7528\u6700\u5c0f\u5e73\u6cd5\u3002\u9996\u5148\u70ba\u4e86\u8981\u9a57\u8b49\u6211\u5011\u4e0a\u9762\u7684\u516c\u5f0f\uff0c\u56e0\u6b64\u6211\u5011\u5148\u5229\u7528 Numpy \u5957\u4ef6\u81ea\u5df1\u624b\u523b\u505a\u4e00\u7cfb\u5217\u7684\u77e9\u9663\u904b\u7b97\u6c42\u51fa\u6bcf\u4e00\u9805\u7684\u4fc2\u6578\u8207\u622a\u8ddd\u3002 import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8a2d\u5b9a\u622a\u8ddd\u9805 b \u6b0a\u91cd\u503c\u70ba 1 b = np . ones (( X . shape [ 0 ], 1 )) # \u6dfb\u52a0\u5e38\u6578\u9805\u7279\u5fb5\uff0c\u6700\u7d42\u6709 13+1 \u500b\u8f38\u5165\u7279\u5fb5 X = np . hstack (( X , b )) # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a08\u7b97 Beta (@ \u70ba numpy \u4e2d 2-D arrays \u7684\u77e9\u9663\u4e58\u6cd5) Beta = np . linalg . inv ( X . T @ X ) @ X . T @ y y_pred = X @ Beta # MSE: 21.8948311817292 print ( 'MSE:' , mean_squared_error ( y_pred , y )) \u8a08\u7b97\u51fa\u4f86 Beta \u5f8c\u6211\u5011\u518d\u628a\u6240\u6709\u7684 X \u5e36\u5165\u4e26\u505a\u8a08\u7b97\uff0c\u7b97\u51fa\u4f86\u7684\u7d50\u679c MSE \u70ba 21.89\u3002\u6700\u5f8c\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u628a Beta \u8b8a\u6578\u5217\u5370\u51fa\u4f86\u3002\u7e3d\u5171\u6703\u6709 14 \u500b\u53c3\u6578\uff0c\u7531 13 \u500b\u8f38\u5165\u7279\u5fb5\u4fc2\u6578\u8207\u6700\u5f8c\u4e00\u9805\u622a\u8ddd\u6240\u7d44\u6210\u7684\u3002 \u8f38\u51fa\u7d50\u679c\uff1a array([-1.08011358e-01, 4.64204584e-02, 2.05586264e-02, 2.68673382e+00, -1.77666112e+01, 3.80986521e+00, 6.92224640e-04, -1.47556685e+00, 3.06049479e-01, -1.23345939e-02, -9.52747232e-01, 9.31168327e-03, -5.24758378e-01, 3.64594884e+01]) \u4f7f\u7528 Sklearn LinearRegression \u7dda\u6027\u8ff4\u6b78\u7c21\u55ae\u4f86\u8aaa\uff0c\u5c31\u662f\u5c07\u8907\u96dc\u7684\u8cc7\u6599\u6578\u64da\uff0c\u64ec\u548c\u81f3\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u5c31\u80fd\u65b9\u4fbf\u9810\u6e2c\u672a\u4f86\u7684\u8cc7\u6599\u3002\u63a5\u4e0b\u4f86\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Sklearn \u63d0\u4f9b\u7684 LinearRegression \u4f86\u6c42\u89e3\u3002 Parameters: - fit_intercept: \u662f\u5426\u6709\u622a\u8ddd\uff0c\u5982\u679c\u6c92\u6709\u5247\u76f4\u7dda\u904e\u539f\u9ede\u3002 Attributes: - coef_: \u53d6\u5f97\u4fc2\u6578\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: R2 score \u6a21\u578b\u8a55\u4f30\u3002 import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a13\u7df4\u6a21\u578b linearModel = LinearRegression () linearModel . fit ( X , y ) y_pred = linearModel . predict ( X ) # 21.894831181729202 print ( 'MSE:' , mean_squared_error ( y_pred , y )) Sklearn \u7684 LinearRegression \u6a21\u578b\u4e5f\u662f\u63a1\u7528\u5c0f\u5e73\u65b9\u6cd5\u6c42\u89e3\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5176 MSE \u8207\u7a0d\u65e9\u624b\u523b\u7684\u65b9\u6cd5\u76f8\u7576\u5f88\u63a5\u8fd1\u3002\u53e6\u5916 Sklearn \u6a21\u578b\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86 coef_ \u548c intercept_ \u5169\u500b\u5c6c\u6027\u53ef\u4ee5\u53d6\u5f97\u6a21\u578b\u7684\u7279\u5fb5\u4fc2\u6578\u8207\u622a\u8ddd\u3002 \u591a\u9805\u5f0f\u7684\u8ff4\u6b78\u6a21\u578b \u5c0d\u65bc\u7dda\u6027\u8ff4\u6b78\u4f86\u8aaa\uff0c\u8cc7\u6599\u90fd\u662f\u5f88\u5747\u52fb\u5730\u5206\u5e03\u5728\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u4f46\u73fe\u5be6\u7684\u8cc7\u6599\u5f80\u5f80\u662f\u975e\u7dda\u6027\u7684\u5206\u4f48\u3002\u5982\u679c\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u4e0a\u8ff0\u65b9\u6cd5\u53d6\u5f97\u7dda\u6027\u6a21\u578b\uff0c\u5728\u5be6\u969b\u5834\u57df\u4e0a\u9810\u6e2c\u6548\u679c\u53ef\u80fd\u4e26\u4e0d\u5927\u3002 \u591a\u9805\u5f0f\u8ff4\u6b78\u4e2d\uff0c\u6578\u64da\u4e0d\u592a\u5177\u6709\u7dda\u6027\u95dc\u4fc2\uff0c\u56e0\u6b64\u61c9\u5c0b\u627e\u4e00\u4e9b\u975e\u7dda\u6027\u66f2\u7dda\u53bb\u64ec\u5408\u3002\u5c0d\u65bc\u4ee5\u4e0a\u7684\u6578\u64da\uff0c\u539f\u672c\u662f\u53ea\u6709\u4e00\u500b x \u7279\u5fb5\uff0c\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u5efa\u69cb\u8a31\u591a\u65b0\u7684\u7279\u5fb5\u3002\u5982\u4e0b\u5716\uff0c\u7528\u4e00\u689d\u4e09\u6b21\u66f2\u7dda\u53bb\u64ec\u5408\u6578\u64da\u6548\u679c\u66f4\u597d\u3002\u6211\u5011\u5c07\u4e09\u6b21\u51fd\u6578\u770b\u6210 ax 3 +bx 2 +cx+d\u3002\u9019\u6a23\u5c31\u53c8\u8b8a\u6210\u89e3\u591a\u5143\uff0c\u5176\u6211\u5011\u5c31\u662f\u8981\u627e\u51fa a\u3001b\u3001c\u3001d \u4f7f\u5176\u640d\u5931\u51fd\u6578\u6700\u5c0f\u3002 \u7dda\u6027\u6a21\u578b\u7684\u64f4\u5c55 \u5f9e\u4e0a\u8ff0\u554f\u984c\u4e2d\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u8ff4\u6b78\u5728\u5be6\u52d9\u4e0a\u6240\u9762\u81e8\u7684\u554f\u984c\u3002\u9996\u5148\u6211\u5011\u4f86\u8ff4\u9867\u4e00\u4e0b\u7a0d\u65e9\u6240\u63d0\u5230\u7684\u7dda\u6027\u65b9\u7a0b\u5f0f\uff0c\u9019\u7d44\u7dda\u6027\u65b9\u7a0b\u5f0f\u8aaa\u660e\u4e86\u6bcf\u500b\u7279\u5fb5 x \u4e00\u6b21\u65b9\u8207\u76ee\u6a19\u503c\u662f\u6709\u4e00\u500b\u7dda\u6027\u7684\u95dc\u4fc2\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + ... + \u03b2 n x n \u63a5\u8457\u6211\u5011\u518d\u4f86\u770b\u4e00\u4e0b\u53e6\u4e00\u500b\u4f8b\u5b50\uff0c\u6bd4\u5982\u8aaa\u7279\u5fb5 x 1 \u8207\u76ee\u6a19\u503c\u5b58\u5728\u8457\u4ee5\u4e0b\u7684\u95dc\u4fc2\u3002\u6211\u5011\u767c\u73fe\u9019\u7d44\u65b9\u7a0b\u5f0f\u5df2\u7d93\u4e0d\u662f\u4e00\u500b\u7dda\u6027\u95dc\u4fc2\u4e86\uff0c\u56e0\u70ba\u4ed6\u6709\u4e86 x 1 \u7684\u4e8c\u6b21\u65b9\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 \u90a3\u9ebc\u8a72\u600e\u9ebc\u505a\u6211\u5011\u624d\u80fd\u53c8\u628a\u5b83\u8f49\u63db\u6210\u7dda\u6027\u95dc\u4fc2\u5462\uff1f\u9019\u6642\u5019\u6211\u5011\u5c31\u53ef\u4ee5\u7528\u4e00\u500b\u65b0\u7684\u7279\u5fb5 x 2 \u3002\u6211\u5011\u8b93 x 2 \u7b49\u65bc x 1 \u7684\u5e73\u65b9\uff0c\u9019\u6a23\u6211\u5011\u518d\u628a x 2 \u5e36\u8ff4\u539f\u65b9\u7a0b\u5f0f\u4e2d\u3002\u6b64\u6642\u9019\u5169\u500b\u7279\u5fb5 x 1 \u8207 x 2 \u8207\u76ee\u6a19\u503c\u53c8\u8ff4\u5230\u4e86\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 \u540c\u6a23\u7684\u6211\u5011\u518d\u4f86\u770b\u53e6\u4e00\u500b\u4f8b\u5b50\u3002\u6211\u5011\u5982\u679c\u5f15\u5165\u4e86 x 1 \u7684\u4e09\u6b21\u65b9\u7684\u8a71\uff0c\u4ed6\u7684\u65b9\u7a0b\u5f0f\u5982\u4e0b\uff1a y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 + \u03b2 3 x 1 3 \u540c\u7406\u6211\u5011\u9019\u6642\u4e00\u6a23\u53ef\u4ee5\u5f15\u5165\u65b0\u7684\u7279\u5fb5 x 2 \u7b49\u65bc x 1 \u7684\u4e8c\u6b21\u65b9\uff0c\u4ee5\u53ca x 3 \u7b49\u65bc x 1 \u7684\u4e09\u6b21\u65b9\u3002\u9019\u6a23\u7d93\u904e\u4e00\u500b\u8f49\u63db\u4ee5\u5f8c\u6211\u5011\u7684 y \u503c\u8207\u6240\u6709\u7684\u7279\u5fb5\u9593\u4f9d\u7136\u5b58\u5728\u8457\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 and x 3 = x 1 3 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + \u03b2 3 x 3 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u5f15\u5165\u8f49\u8b8a\u904e\u5f8c\u7684 x \u4f5c\u70ba\u4e00\u500b\u65b0\u7684\u7279\u5fb5\u4f86\u6eff\u8db3\u7dda\u6027\u5047\u8a2d\u3002\u6b64\u6642\u7684\u8ff4\u6b78\u65b9\u7a0b\u5f0f\u5c31\u662f\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78(polynomial regression)\u3002 Sklearn \u5be6\u4f5c\u591a\u9805\u5f0f\u8ff4\u6b78 \u7531\u65bc Sklearn \u6c92\u6709\u5c01\u88dd\u597d\u7684\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb\u3002\u4e0d\u904e\u6211\u5011\u53ef\u4ee5\u900f\u904e make_pipeline \u5c07 PolynomialFeatures \u8207 LinearRegression \u5c01\u88dd\u6210\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\uff0c\u4e26\u4e14\u4f7f\u7528\u8005\u53ef\u4ee5\u96a8\u610f\u8a2d\u5b9a degree(\u6b21\u65b9)\u503c\u3002 \u6211\u5011\u53ef\u4ee5\u5c0d\u539f\u672c\u7684\u7279\u5fb5\u9032\u884c PolynomialFeatures \u69cb\u9020\u65b0\u6a23\u672c\u7279\u5fb5\u63a1\u3002\u4e26\u5c07\u8f49\u63db\u5f8c\u7684\u7279\u5fb5\u9001\u5230\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u9032\u884c\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u81ea\u5b9a\u7fa9\u4e00\u500b PolynomialRegression() \u7684\u51fd\u5f0f\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u8f38\u5165 degree \u5927\u5c0f\u63a7\u5236\u6a21\u578b\u7684\u5f37\u5ea6\u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u4f7f\u7528 Sklearn \u7684 pipeline \u65b9\u6cd5\u5c07 PolynomialFeatures \u7279\u5fb5\u8f49\u63db\u8207 LinearRegression \u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5c01\u88dd\u8d77\u4f86\u3002\u53e6\u5916\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u900f\u904e\u81ea\u8a02\u7fa9\u7684 make_data() \u51fd\u5f0f\u7522\u751f\u4e00\u7d44\u96a8\u6a5f\u7684 x \u548c y\u3002\u8a72\u51fd\u5f0f\u4e2d\u53ef\u4ee5\u8a2d\u5b9a\u96a8\u6a5f\u8cc7\u6599\u7684\u6bd4\u6578\uff0c\u4e0b\u9762\u7a0b\u5f0f\u4e2d\u6211\u5011\u5148\u96a8\u6a5f\u5efa\u7acb 100 \u7b46\u6578\u64da\u3002 from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline plt . style . use ( 'seaborn' ) # make_pipeline\u662f\u6307\u53ef\u4ee5\u5c07\u591a\u500bSklearn\u7684function\u4e00\u8d77\u57f7\u884c def PolynomialRegression ( degree = 2 , ** kwargs ): return make_pipeline ( PolynomialFeatures ( degree ), LinearRegression ( ** kwargs )) # \u96a8\u6a5f\u5b9a\u7fa9\u65b0\u7684x,y\u503c def make_data ( N , err = 1 , rseed = 42 ): rng = np . random . RandomState ( rseed ) x = rng . rand ( N , 1 ) ** 2 y = 10 - 1 / ( x . ravel () + 0.1 ) if err > 0 : y += err * rng . randn ( N ) return x , y X , y = make_data ( 100 ) \u8a13\u7df4\u8cc7\u6599\u8207\u6e2c\u8a66\u8cc7\u6599\u90fd\u5efa\u7acb\u5b8c\u6210\u5f8c\u3002\u6211\u5011\u5c31\u53ef\u4ee5\u5c07\u8a13\u7df4\u8cc7\u6599\u4e1f\u5165\u5efa\u7acb\u597d\u7684 PolynomialRegression() \u4e26\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u4e0b\u9762\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u6f14\u793a degree \u7b49\u65bc 1\u30013\u30019\uff0c\u4e26\u4f86\u67e5\u770b\u96a8\u8457\u6b21\u65b9\u6578\u7684\u589e\u9577\u5c0d\u65bc\u6a21\u578b\u7684\u64ec\u5408\u7a0b\u5ea6\u7684\u5f71\u97ff\u3002 # \u6e2c\u8a66\u8cc7\u6599\u96c6 x_test = np . linspace ( - 0.1 , 1.1 , 500 )[:, None ] # \u7e6a\u88fd\u771f\u5be6\u7b54\u6848\u7684\u5206\u4f48 plt . scatter ( X . ravel (), y , color = 'black' ) # \u6e2c\u8a66 1,3,7 \u7684degree for degree in [ 1 , 3 , 9 ]: y_test = PolynomialRegression ( degree ) . fit ( X , y ) . predict ( x_test ) plt . plot ( x_test . ravel (), y_test , label = 'degree= {} ' . format ( degree )) plt . xlim ( - 0.1 , 1.0 ) plt . ylim ( - 2 , 12 ) plt . legend ( loc = 'best' ) \u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u96a8\u8457\u6b21\u65b9\u6578 degree \u7684\u589e\u9577\u6a21\u578b\u6703\u8b8a\u5f97\u8d8a\u8907\u96dc\u3002\u540c\u6642\u5c0d\u65bc\u8a13\u7df4\u6578\u64da\u7684\u64ec\u5408\u7d50\u679c\u8d8a\u597d\u3002\u4f46\u662f\u9019\u88e1\u5fc5\u9808\u6ce8\u610f\u4e26\u975e\u8d8a\u5927\u7684 degree \u5c31\u662f\u8d8a\u597d\u7684\uff0c\u56e0\u70ba\u96a8\u8457\u6a21\u578b\u8907\u96dc\u6703\u6709\u904e\u5ea6\u64ec\u5408\u7684\u8de1\u8c61\u3002\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u627e\u51fa\u4e00\u500b\u9069\u7576\u7684 degree \u6578\u503c\u4e26\u8207\u6e2c\u8a66\u96c6\u9a57\u8b49\u8207\u8a55\u4f30\u3002\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684 MSE \u5dee\u8ddd\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u5982\u679c\u6211\u5011\u4e00\u6627\u7684\u8ffd\u6c42\u8a13\u7df4\u96c6\u7684\u640d\u5931\u6700\u5c0f\u5316\uff0c\u53ef\u80fd\u6703\u5f71\u97ff\u5230\u6e2c\u8a66\u96c6\u7684\u8868\u73fe\u80fd\u529b\u5c0e\u81f4\u9810\u6e2c\u7d50\u679c\u8b8a\u5dee\u3002 Gradient descent (\u68af\u5ea6\u4e0b\u964d\u6cd5) \u63a5\u4e0b\u4f86\u6211\u5011\u4f86\u8a0e\u8ad6\u512a\u5316\u554f\u984c\u7684\u7b2c\u4e8c\u7a2e\u65b9\u6cd5\uff0c\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002\u68af\u5ea6\u4e0b\u964d\u4e0d\u50c5\u9650\u65bc\u7dda\u6027\u8ff4\u6b78\uff0c\u5728\u975e\u7dda\u6027\u548c\u795e\u7d93\u7db2\u7d61\u540c\u6a23\u9069\u7528\u3002\u4e0b\u5716\u4e2d\u6bcf\u4e00\u500b\u9ede\u662f\u8a13\u7df4\u96c6\u7684\u6a23\u672c x \u8ef8\u70ba\u8f38\u5165\u503c y \u8ef8\u70ba\u8f38\u51fa\u503c\u3002\u4e5f\u5c31\u662f\u5e73\u9762\u4e0a\u6bcf\u500b\u9ede x \u90fd\u6703\u6709\u4e00\u500b\u76f8\u5c0d\u61c9 y \u7684\u8f38\u51fa\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u505a\u7684\u4e8b\u60c5\u662f\u70ba\u9019\u4e9b\u9ede\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u9019\u689d\u76f4\u7dda\u80fd\u5920\u76e1\u53ef\u80fd\u53cd\u6620\u51fa x \u8207 y \u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u6b64\u5916\u6211\u5011\u90fd\u77e5\u9053\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\u6211\u5011\u80fd\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\uff0c\u90a3\u6211\u5011\u8a72\u5982\u4f55\u627e\u5230\u9019\u689d\u6700\u4f73\u7684\u76f4\u7dda\u5462\uff1f\u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u4f7f\u5f97\u9019\u4e9b\u8a13\u7df4\u8cc7\u6599\u4e2d\u7684\u6bcf\u500b\u6a23\u672c\u9ede\u5230\u9019\u4e00\u689d\u76f4\u7dda\u7684\u8ddd\u96e2\u5e73\u65b9\u548c\u8981\u6700\u5c0f\u3002\u56e0\u6b64\u9019\u88e1\u6211\u5011\u5c07\u8a0e\u8ad6\u8a72\u5982\u4f55\u4f7f\u7528\u68af\u5ea6\u4e0b\u964d\u6cd5\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u9996\u5148\u6211\u5011\u5047\u8a2d\u4e00\u500b\u76f4\u7dda\u7684\u65b9\u7a0b\u5f0f\u662f y = \u03b2 0 + \u03b2 1 x\u3002\u90a3\u9996\u5148\u6211\u5011\u53ef\u4ee5\u5148\u96a8\u6a5f\u7684\u7d66\u4e88 \u03b2 0 \u548c \u03b2 1 \u4e00\u500b\u521d\u59cb\u503c\u3002\u4e26\u5f97\u5230\u4e0b\u5716\u4e2d\u7684\u7d50\u679c\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u9019\u4e00\u689d\u76f4\u7dda\u4e26\u4e0d\u80fd\u53cd\u6620\u51fa x \u548c y \u7684\u95dc\u806f\u6027\u3002 \u5982\u679c\u6211\u5011\u4e0d\u65b7\u7684\u8fed\u4ee3\uff0c\u6bcf\u4e00\u6b21\u7684\u8fed\u4ee3\u90fd\u8b93\u9019\u4e00\u689d\u76f4\u7dda\u671d\u8457\u66f4\u7b26\u5408\u6578\u64da\u9ede\u7684\u65b9\u5411\u79fb\u52d5\u4e00\u9ede\uff0c\u90a3\u9ebc\u7d93\u904e\u8a31\u591a\u6b21\u7684\u66f4\u65b0\u6211\u5011\u5c31\u53ef\u4ee5\u5f97\u5230\u6700\u4f73\u7684\u7d50\u679c\u3002\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u8981\u66f4\u65b0\u6240\u6709\u7684\u53c3\u6578\uff0c\u4f8b\u5982\uff1a \u03b2 0 \u548c \u03b2 1 \uff0c\u76f4\u5230\u5f97\u5230\u6700\u5c0f\u7684 MSE \u6216\u662f\u9810\u5b9a\u7684\u8fed\u4ee3\u6b21\u6578\u3002\u4ee5\u4e0b\u7684\u516c\u5f0f\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u7684\u8868\u9054\u5f0f\u3002\u5b83\u53cd\u6620\u7684\u662f\u6bcf\u6b21\u8fed\u4ee3\uff0c\u6211\u5011\u7684 \u03b2 0 \u548c \u03b2 1 \u9019\u4e9b\u53c3\u6578\u662f\u5982\u4f55\u8abf\u6574\u7684\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u9019\u500b\u516c\u5f0f\u5f97\u77e5\uff0c\u4ed6\u662f\u5c0d\u640d\u5931\u51fd\u6578\u6c42\u4e86\u67d0\u4e00\u500b\u7279\u5b9a\u53c3\u6578\u7684\u504f\u5c0e\u3002\u9019\u5c31\u662f\u6240\u8b02\u7684\u68af\u5ea6\uff0c\u6211\u5011\u671d\u8457\u68af\u5ea6\u7684\u53cd\u65b9\u5411\u5728\u66f4\u65b0\u3002\u7136\u800c\u6bcf\u4e00\u6b21\u8981\u66f4\u65b0\u591a\u5927\u53ef\u4ee5\u4f9d\u9760 \u03b7\uff08(eta) \u4f86\u63a7\u5236\uff0c\u56e0\u6b64\u6211\u5011\u7b97\u51fa\u4f86\u7684\u68af\u5ea6\u9084\u6703\u4e58\u4e0a\u4e00\u500b\u5b78\u7fd2\u901f\u7387\u4f86\u9632\u6b62\u66f4\u65b0\u6b65\u4f10\u592a\u5927\u800c\u5c0e\u81f4\u627e\u4e0d\u5230\u89e3\u3002\u6240\u4ee5 \u03b7 \u7684\u5927\u5c0f\u8981\u9069\u4e2d\u4ee5\u514d\u5f71\u97ff\u5230\u6a21\u578b\u6700\u7d42\u7684\u6536\u6582\u3002 \u6b64\u5916\u9019\u500b\u6a21\u578b\u5982\u679c\u900f\u904e\u68af\u5ea6\u4e0b\u964d\u6cd5\u9084\u6709\u4e00\u500b\u7f3a\u9ede\uff0c\u90a3\u5c31\u662f\u7576\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\u4e0d\u662f\u4e00\u500b\u51f8\u51fd\u6578(convex function) \u7684\u6642\u5019\u5b83\u5c31\u6703\u5b58\u5728\u8a31\u591a\u500b\u6700\u4f4e\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u5728\u6211\u5011\u9078\u64c7\u4e0d\u540c\u7684 \u03b2 0 \u548c \u03b2 1 \u4f5c\u70ba\u521d\u59cb\u503c\u7684\u6642\u5019\u5f88\u53ef\u80fd\u6703\u6536\u6582\u65bc\u4e0d\u540c\u7684\u5c40\u90e8\u6700\u4f73\u89e3(local optimum)\u3002\u4e5f\u5c31\u662f\u8aaa\u6211\u5011\u6c42\u5f97\u7684\u6700\u4f73\u7684\u6a21\u578b\u5f88\u6709\u6a5f\u6703\u662f\u5c40\u90e8\u6700\u4f73\u89e3\u800c\u4e0d\u662f\u5168\u5c40\u6700\u4f73\u89e3(global optimum)\u3002 \u4f7f\u7528 Sklearn SGDRegressor Sklearn \u63d0\u4f9b\u4e86 SGDRegressor \u4e26\u5be6\u73fe\u4e86\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5b78\u7fd2\u3002\u4f60\u53ef\u80fd\u6703\u554f\u68af\u5ea6\u4e0b\u964d\u8207\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5169\u8005\u5dee\u5225\u5728\u54ea\uff1f\u7c21\u55ae\u4f86\u8aaa\u4e00\u822c\u7684\u68af\u5ea6\u4e0b\u964d\u6cd5\u662f\u4e00\u6b21\u7528\u5168\u90e8\u8a13\u7df4\u96c6\u7684\u6578\u64da\u8a08\u7b97\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\uff0c\u7136\u5f8c\u505a\u4e00\u6b21\u53c3\u6578\u7684\u66f4\u65b0\u4fee\u6b63\u3002\u800c\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u5c31\u662f\u4e00\u6b21\u8dd1\u4e00\u500b\u6a23\u672c\u6216\u662f\u5c0f\u6279\u6b21\u6a23\u672c\uff0c\u7136\u5f8c\u7b97\u51fa\u4e00\u6b21\u68af\u5ea6\u4e26\u66f4\u65b0\u3002\u800c\u6240\u8b02\u7684\u96a8\u6a5f\u5c31\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u96a8\u6a5f\u5730\u62bd\u53d6\u6a23\u672c\uff0c\u6240\u4ee5\u624d\u6703\u7a31\u70ba\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002 import numpy as np from sklearn.linear_model import SGDRegressor from sklearn.metrics import mean_squared_error # \u96a8\u6a5f\u7522\u751f\u4e00\u500b\u7279\u5fb5\u7684X\u8207\u8f38\u51fay X , y = make_data ( 100 ) # \u5efa\u7acb SGDRegressor \u4e26\u8a2d\u7f6e\u8d85\u53c3\u6578 regModel = SGDRegressor ( max_iter = 100 ) # \u8a13\u7df4\u6a21\u578b regModel . fit ( X , y ) # \u5efa\u7acb\u6e2c\u8a66\u8cc7\u6599 x_test = np . linspace ( - 0.05 , 1 , 500 )[:, None ] # \u9810\u6e2c\u6e2c\u8a66\u96c6 y_test = regModel . predict ( x_test ) # \u9810\u6e2c\u8a13\u7df4\u96c6 y_pred = regModel . predict ( X ) # \u8996\u89ba\u5316\u9810\u6e2c\u7d50\u679c plt . scatter ( X , y ) plt . plot ( x_test . ravel (), y_test , color = \"#d62728\" ) plt . xlabel ( 'x' ) plt . ylabel ( 'y' ) plt . text ( 0 , 10 , 'Loss(MSE)= %.3f ' % mean_squared_error ( y_pred , y ), fontdict = { 'size' : 15 , 'color' : 'red' }) plt . show () \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#day-8-linear-regression","text":"## \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 - \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 - \u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u4f86\u627e\u51fa\u4e00\u689d\u51fd\u5f0f\uff0c\u4f86\u6700\u4f73\u5316\u6a21\u578b - \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 - \u7dda\u6027\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b - \u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u3001\u591a\u5143\u8ff4\u6b78\u3001\u975e\u7dda\u6027\u8ff4\u6b78 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_1","text":"\u7dda\u6027\u8ff4\u6b78\u662f\u7d71\u8a08\u4e0a\u5728\u627e\u591a\u500b\u81ea\u8b8a\u6578\u548c\u4f9d\u8b8a\u6578\u4e4b\u9593\u7684\u95dc\u4fc2\u6240\u5efa\u51fa\u4f86\u7684\u6a21\u578b\u3002\u53ea\u6709\u4e00\u500b\u81ea\u8b8a\u6578(x)\u548c\u4e00\u500b\u4f9d\u8b8a\u6578(y)\u7684\u60c5\u5f62\u7a31\u70ba\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u5927\u65bc\u4e00\u500b\u81ea\u8b8a\u6578(x 1 ,x 2 ,...)\u7684\u60c5\u5f62\u7a31\u70ba\u591a\u5143\u8ff4\u6b78\u3002 \u4e00\u500b\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78: y=ax+b\uff0c\u5176\u4e2d b\uff1a\u622a\u8ddd(Intercept)\uff0ca\uff1a\u659c\u7387(Slope) \u70ba x \u8b8a\u52d5\u4e00\u500b\u55ae\u4f4d y \u8b8a\u52d5\u7684\u91cf\uff0c\u5982\u4e0b\u5716: \u8ff4\u6b78\u5206\u6790\u7684\u76ee\u6a19\u51fd\u6578\u6216\u7a31\u640d\u5931\u51fd\u6578(loss function)\u5c31\u662f\u5e0c\u671b\u627e\u5230\u7684\u6a21\u578b\u6700\u7d42\u7684\u6b98\u5dee\u8d8a\u5c0f\u8d8a\u597d\uff0c\u4f86\u627e\u53c3\u6578 a \u548c b\u3002","title":"\u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_2","text":"\u7dda\u6027\u6a21\u578b\u6700\u5e38\u898b\u7684\u89e3\u6cd5\u6709\u5169\u7a2e\uff0c\u5206\u5225\u70ba Closed-form (\u9589\u5f0f\u89e3) \u8207\u68af\u5ea6\u4e0b\u964d (Gradient descent)\u3002\u7576\u7279\u5fb5\u5c11\u6642\u4f7f\u7528 Closed-form \u8f03\u70ba\u9069\u5408\uff0c\u4f7f\u7528\u4e0b\u9762\u516c\u5f0f\u4f86\u6c42\u51fa \u03b8 \u503c\u3002\u6211\u5011\u53c8\u53ef\u4ee5\u8aaa\u7dda\u6027\u6a21\u578b\u7684\u6700\u5c0f\u5e73\u65b9\u6cd5\u7684\u89e3\u5373\u70ba Closed-form\u3002\u82e5\u7576\u662f\u8907\u96dc\u7684\u554f\u984c\u6642 Gradient descen \u8f03\u80fd\u89e3\u6c7a\uff0c\u5176\u539f\u56e0\u662f\u5927\u90e8\u5206\u7684\u554f\u984c\u5176\u5be6\u662f\u6c92\u6709\u516c\u5f0f\u89e3\u7684\u3002\u6211\u5011\u53ea\u80fd\u6c42\u51fa\u4e00\u500b\u51fd\u6578 f(x) \u4f7f\u5176\u8aa4\u5dee\u6700\u5c0f\u8d8a\u597d\u3002 Closed-form Gradient descent ## Least Square Method (\u6700\u5c0f\u5e73\u65b9\u6cd5) \u5047\u8a2d\u4e00\u500b\u5730\u5340\u7684\u623f\u50f9\u8207\u576a\u6578\u662f\u5448\u7dda\u6027\u95dc\u4fc2\uff0c\u4e26\u4ee5\u4e0b\u5716\u4e2d\u7684\u4e09\u500b\u9ede\u8868\u793a\u3002\u5982\u679c\u6211\u5011\u60f3\u900f\u904e\u623f\u5b50\u7684\u576a\u6578\u4f86\u9810\u6e2c\u623f\u50f9\uff0c\u90a3\u9ebc\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8207\u5ea7\u6a19\u5e73\u9762\u4e0a\u9019\u4e09\u500b\u9ede\u7684\u5dee\u8ddd\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9019\u689d\u76f4\u7dda\u8a72\u600e\u9ebc\u627e\u5462\uff1f\u9996\u5148\u6211\u5011\u96a8\u6a5f\u627e\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8a08\u7b97\u9019\u4e09\u9ede\u7684 loss\u3002\u640d\u5931\u51fd\u6578\u53ef\u4ee5\u81ea\u5df1\u5b9a\u7fa9\uff0c\u5047\u8a2d\u6211\u5011\u4f7f\u7528 MSE \u5747\u65b9\u8aa4\u5dee\u4f86\u8a08\u7b97\u3002\u900f\u904e\u4e00\u7cfb\u5217\u8a08\u7b97\u6211\u5011\u5f97\u5230\u4e00\u500b loss \u5373\u70ba MSE \u503c\u3002\u63a5\u8457\u6211\u5011\u5c07\u9019\u500b\u76f4\u7dda\u7a0d\u7a0d\u7684\u8f49\u4e00\u500b\u89d2\u5ea6\u5f8c\u53c8\u53ef\u4ee5\u8a08\u7b97\u4e00\u500b\u65b0\u7684 MSE\uff0c\u6b64\u523b\u6211\u5011\u53ef\u4ee5\u767c\u73fe MSE \u503c\u53c8\u6bd4\u525b\u525b\u66f4\u5c0f\u4e86\u3002\u4e5f\u5c31\u662f\u8aaa\u9019\u4e00\u689d\u65b0\u7684\u76f4\u7dda\u80fd\u5920\u66f4\u6cd5\u61c9\u51fa\u8a13\u7df4\u96c6\u4e2d A\u3001B\u3001C \u7684\u6578\u64da\u9ede\u6240\u53cd\u6620\u7684\u623f\u5c4b\u576a\u6578\u8207\u623f\u50f9\u4e4b\u9593\u7684\u7dda\u6027\u95dc\u4fc2\u3002 \u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\uff0c\u6211\u5011\u53ef\u4ee5\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\u3002\u73fe\u5728\u6211\u5011\u80fd\u505a\u7684\u4e8b\u60c5\u5c31\u662f\u5f9e\u9019\u7121\u6578\u689d\u76f4\u7dda\u4e2d\u9078\u51fa\u4e00\u689d\u6700\u4f73\u7684\u7576\u4f5c\u6211\u5011\u7684\u9810\u6e2c\u6a21\u578b\uff0c\u540c\u6642\u5b83\u9762\u5c0d\u9019\u4e09\u9ede\u7684\u8aa4\u5dee\u662f\u8981\u6700\u5c0f\u7684\u3002\u56e0\u6b64\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u8981\u6700\u5c0f\u5316 MSE \u4e5f\u5c31\u662f\u6240\u8b02\u7684\u640d\u5931\u51fd\u6578 (loss function)\u3002\u6240\u4ee5\u6574\u500b\u7dda\u6027\u8ff4\u6b78\u7684\u76ee\u6a19\u5c31\u662f\u6700\u5c0f\u5316\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\uff0c\u5176\u4e2d\u4e00\u500b\u89e3\u6cd5\u5c31\u662f\u6700\u5c0f\u5e73\u65b9\u6cd5\u3002\u56e0\u70ba MSE \u7b49\u65bc 1/n \u500d\u7684\u6b8b\u5dee\u5e73\u65b9\u548c (RSS)\uff0c\u5176\u4e2d\u5206\u6bcd n \u70ba\u5e38\u6578\uff0c\u4e0d\u5f71\u97ff\u6975\u5c0f\u5316\u6545\u62ff\u6389\u3002\u56e0\u6b64\u6700\u7d42\u7684\u6c42\u89e3\u662f\u6eff\u8db3\u6700\u5c0f\u5316\u5e73\u65b9\u548c\uff0c\u4f7f\u5176\u6700\u5c0f\u5316\u3002\u7d93\u904e\u6578\u5b78\u63a8\u5c0e\u5f8c\uff0c\u7c21\u5316\u7684\u516c\u5f0f\u5982\u4e0b\uff1a","title":"\u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_3","text":"\u57fa\u65bc\u4e0a\u9762\u7684\u516c\u5f0f\u6211\u5011\u60f3\u627e\u51fa\u4e00\u7d44\u53c3\u6578\u6b0a\u91cd \u03b8\u3002\u4e5f\u5c31\u662f\u4e0b\u5716\u554f\u984c\u4e2d\u7684 a (\u03b8 0 )\u3001b (\u03b8 1 ) \u5169\u53c3\u6578\uff0c\u4f7f\u5f97\u5e73\u9762\u4e0a\u9019\u4e09\u9ede\u5e73\u65b9\u548c\u6709\u6975\u5c0f\u503c\u3002\u9019\u500b\u51fd\u5f0f\u5c0d \u03b8 0 , \u03b8 1 \u504f\u505a\u5fae\u5206\u8a2d\u4ed6\u5011\u70ba0\uff0c\u63a5\u8457\u6211\u5011\u5c0d\u65b9\u7a0b\u5f0f\u6c42\u89e3\u3002 \u6b64\u51fd\u5f0f\u53ea\u6709\u6975\u5c0f\u503c\uff0c\u56e0\u6b64\u6211\u5011\u5f97\u5230\u7684 \u03b8 0 , \u03b8 1 \u6700\u5c0f\u6975\u503c\u7684\u89e3\u3002","title":"\u5c0f\u8a66\u8eab\u624b"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_4","text":"","title":"\u7bc4\u4f8b\u7a0b\u5f0f (\u623f\u50f9\u9810\u6e2c)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_5","text":"\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5efa\u6a21\uff0c\u4e26\u63a1\u7528\u6700\u5c0f\u5e73\u6cd5\u3002\u9996\u5148\u70ba\u4e86\u8981\u9a57\u8b49\u6211\u5011\u4e0a\u9762\u7684\u516c\u5f0f\uff0c\u56e0\u6b64\u6211\u5011\u5148\u5229\u7528 Numpy \u5957\u4ef6\u81ea\u5df1\u624b\u523b\u505a\u4e00\u7cfb\u5217\u7684\u77e9\u9663\u904b\u7b97\u6c42\u51fa\u6bcf\u4e00\u9805\u7684\u4fc2\u6578\u8207\u622a\u8ddd\u3002 import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8a2d\u5b9a\u622a\u8ddd\u9805 b \u6b0a\u91cd\u503c\u70ba 1 b = np . ones (( X . shape [ 0 ], 1 )) # \u6dfb\u52a0\u5e38\u6578\u9805\u7279\u5fb5\uff0c\u6700\u7d42\u6709 13+1 \u500b\u8f38\u5165\u7279\u5fb5 X = np . hstack (( X , b )) # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a08\u7b97 Beta (@ \u70ba numpy \u4e2d 2-D arrays \u7684\u77e9\u9663\u4e58\u6cd5) Beta = np . linalg . inv ( X . T @ X ) @ X . T @ y y_pred = X @ Beta # MSE: 21.8948311817292 print ( 'MSE:' , mean_squared_error ( y_pred , y )) \u8a08\u7b97\u51fa\u4f86 Beta \u5f8c\u6211\u5011\u518d\u628a\u6240\u6709\u7684 X \u5e36\u5165\u4e26\u505a\u8a08\u7b97\uff0c\u7b97\u51fa\u4f86\u7684\u7d50\u679c MSE \u70ba 21.89\u3002\u6700\u5f8c\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u628a Beta \u8b8a\u6578\u5217\u5370\u51fa\u4f86\u3002\u7e3d\u5171\u6703\u6709 14 \u500b\u53c3\u6578\uff0c\u7531 13 \u500b\u8f38\u5165\u7279\u5fb5\u4fc2\u6578\u8207\u6700\u5f8c\u4e00\u9805\u622a\u8ddd\u6240\u7d44\u6210\u7684\u3002 \u8f38\u51fa\u7d50\u679c\uff1a array([-1.08011358e-01, 4.64204584e-02, 2.05586264e-02, 2.68673382e+00, -1.77666112e+01, 3.80986521e+00, 6.92224640e-04, -1.47556685e+00, 3.06049479e-01, -1.23345939e-02, -9.52747232e-01, 9.31168327e-03, -5.24758378e-01, 3.64594884e+01])","title":"\u624b\u523b\u7dda\u6027\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn-linearregression","text":"\u7dda\u6027\u8ff4\u6b78\u7c21\u55ae\u4f86\u8aaa\uff0c\u5c31\u662f\u5c07\u8907\u96dc\u7684\u8cc7\u6599\u6578\u64da\uff0c\u64ec\u548c\u81f3\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u5c31\u80fd\u65b9\u4fbf\u9810\u6e2c\u672a\u4f86\u7684\u8cc7\u6599\u3002\u63a5\u4e0b\u4f86\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Sklearn \u63d0\u4f9b\u7684 LinearRegression \u4f86\u6c42\u89e3\u3002 Parameters: - fit_intercept: \u662f\u5426\u6709\u622a\u8ddd\uff0c\u5982\u679c\u6c92\u6709\u5247\u76f4\u7dda\u904e\u539f\u9ede\u3002 Attributes: - coef_: \u53d6\u5f97\u4fc2\u6578\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: R2 score \u6a21\u578b\u8a55\u4f30\u3002 import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a13\u7df4\u6a21\u578b linearModel = LinearRegression () linearModel . fit ( X , y ) y_pred = linearModel . predict ( X ) # 21.894831181729202 print ( 'MSE:' , mean_squared_error ( y_pred , y )) Sklearn \u7684 LinearRegression \u6a21\u578b\u4e5f\u662f\u63a1\u7528\u5c0f\u5e73\u65b9\u6cd5\u6c42\u89e3\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5176 MSE \u8207\u7a0d\u65e9\u624b\u523b\u7684\u65b9\u6cd5\u76f8\u7576\u5f88\u63a5\u8fd1\u3002\u53e6\u5916 Sklearn \u6a21\u578b\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86 coef_ \u548c intercept_ \u5169\u500b\u5c6c\u6027\u53ef\u4ee5\u53d6\u5f97\u6a21\u578b\u7684\u7279\u5fb5\u4fc2\u6578\u8207\u622a\u8ddd\u3002","title":"\u4f7f\u7528 Sklearn LinearRegression"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_6","text":"\u5c0d\u65bc\u7dda\u6027\u8ff4\u6b78\u4f86\u8aaa\uff0c\u8cc7\u6599\u90fd\u662f\u5f88\u5747\u52fb\u5730\u5206\u5e03\u5728\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u4f46\u73fe\u5be6\u7684\u8cc7\u6599\u5f80\u5f80\u662f\u975e\u7dda\u6027\u7684\u5206\u4f48\u3002\u5982\u679c\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u4e0a\u8ff0\u65b9\u6cd5\u53d6\u5f97\u7dda\u6027\u6a21\u578b\uff0c\u5728\u5be6\u969b\u5834\u57df\u4e0a\u9810\u6e2c\u6548\u679c\u53ef\u80fd\u4e26\u4e0d\u5927\u3002 \u591a\u9805\u5f0f\u8ff4\u6b78\u4e2d\uff0c\u6578\u64da\u4e0d\u592a\u5177\u6709\u7dda\u6027\u95dc\u4fc2\uff0c\u56e0\u6b64\u61c9\u5c0b\u627e\u4e00\u4e9b\u975e\u7dda\u6027\u66f2\u7dda\u53bb\u64ec\u5408\u3002\u5c0d\u65bc\u4ee5\u4e0a\u7684\u6578\u64da\uff0c\u539f\u672c\u662f\u53ea\u6709\u4e00\u500b x \u7279\u5fb5\uff0c\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u5efa\u69cb\u8a31\u591a\u65b0\u7684\u7279\u5fb5\u3002\u5982\u4e0b\u5716\uff0c\u7528\u4e00\u689d\u4e09\u6b21\u66f2\u7dda\u53bb\u64ec\u5408\u6578\u64da\u6548\u679c\u66f4\u597d\u3002\u6211\u5011\u5c07\u4e09\u6b21\u51fd\u6578\u770b\u6210 ax 3 +bx 2 +cx+d\u3002\u9019\u6a23\u5c31\u53c8\u8b8a\u6210\u89e3\u591a\u5143\uff0c\u5176\u6211\u5011\u5c31\u662f\u8981\u627e\u51fa a\u3001b\u3001c\u3001d \u4f7f\u5176\u640d\u5931\u51fd\u6578\u6700\u5c0f\u3002","title":"\u591a\u9805\u5f0f\u7684\u8ff4\u6b78\u6a21\u578b"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_7","text":"\u5f9e\u4e0a\u8ff0\u554f\u984c\u4e2d\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u8ff4\u6b78\u5728\u5be6\u52d9\u4e0a\u6240\u9762\u81e8\u7684\u554f\u984c\u3002\u9996\u5148\u6211\u5011\u4f86\u8ff4\u9867\u4e00\u4e0b\u7a0d\u65e9\u6240\u63d0\u5230\u7684\u7dda\u6027\u65b9\u7a0b\u5f0f\uff0c\u9019\u7d44\u7dda\u6027\u65b9\u7a0b\u5f0f\u8aaa\u660e\u4e86\u6bcf\u500b\u7279\u5fb5 x \u4e00\u6b21\u65b9\u8207\u76ee\u6a19\u503c\u662f\u6709\u4e00\u500b\u7dda\u6027\u7684\u95dc\u4fc2\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + ... + \u03b2 n x n \u63a5\u8457\u6211\u5011\u518d\u4f86\u770b\u4e00\u4e0b\u53e6\u4e00\u500b\u4f8b\u5b50\uff0c\u6bd4\u5982\u8aaa\u7279\u5fb5 x 1 \u8207\u76ee\u6a19\u503c\u5b58\u5728\u8457\u4ee5\u4e0b\u7684\u95dc\u4fc2\u3002\u6211\u5011\u767c\u73fe\u9019\u7d44\u65b9\u7a0b\u5f0f\u5df2\u7d93\u4e0d\u662f\u4e00\u500b\u7dda\u6027\u95dc\u4fc2\u4e86\uff0c\u56e0\u70ba\u4ed6\u6709\u4e86 x 1 \u7684\u4e8c\u6b21\u65b9\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 \u90a3\u9ebc\u8a72\u600e\u9ebc\u505a\u6211\u5011\u624d\u80fd\u53c8\u628a\u5b83\u8f49\u63db\u6210\u7dda\u6027\u95dc\u4fc2\u5462\uff1f\u9019\u6642\u5019\u6211\u5011\u5c31\u53ef\u4ee5\u7528\u4e00\u500b\u65b0\u7684\u7279\u5fb5 x 2 \u3002\u6211\u5011\u8b93 x 2 \u7b49\u65bc x 1 \u7684\u5e73\u65b9\uff0c\u9019\u6a23\u6211\u5011\u518d\u628a x 2 \u5e36\u8ff4\u539f\u65b9\u7a0b\u5f0f\u4e2d\u3002\u6b64\u6642\u9019\u5169\u500b\u7279\u5fb5 x 1 \u8207 x 2 \u8207\u76ee\u6a19\u503c\u53c8\u8ff4\u5230\u4e86\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 \u540c\u6a23\u7684\u6211\u5011\u518d\u4f86\u770b\u53e6\u4e00\u500b\u4f8b\u5b50\u3002\u6211\u5011\u5982\u679c\u5f15\u5165\u4e86 x 1 \u7684\u4e09\u6b21\u65b9\u7684\u8a71\uff0c\u4ed6\u7684\u65b9\u7a0b\u5f0f\u5982\u4e0b\uff1a y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 + \u03b2 3 x 1 3 \u540c\u7406\u6211\u5011\u9019\u6642\u4e00\u6a23\u53ef\u4ee5\u5f15\u5165\u65b0\u7684\u7279\u5fb5 x 2 \u7b49\u65bc x 1 \u7684\u4e8c\u6b21\u65b9\uff0c\u4ee5\u53ca x 3 \u7b49\u65bc x 1 \u7684\u4e09\u6b21\u65b9\u3002\u9019\u6a23\u7d93\u904e\u4e00\u500b\u8f49\u63db\u4ee5\u5f8c\u6211\u5011\u7684 y \u503c\u8207\u6240\u6709\u7684\u7279\u5fb5\u9593\u4f9d\u7136\u5b58\u5728\u8457\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 and x 3 = x 1 3 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + \u03b2 3 x 3 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u5f15\u5165\u8f49\u8b8a\u904e\u5f8c\u7684 x \u4f5c\u70ba\u4e00\u500b\u65b0\u7684\u7279\u5fb5\u4f86\u6eff\u8db3\u7dda\u6027\u5047\u8a2d\u3002\u6b64\u6642\u7684\u8ff4\u6b78\u65b9\u7a0b\u5f0f\u5c31\u662f\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78(polynomial regression)\u3002","title":"\u7dda\u6027\u6a21\u578b\u7684\u64f4\u5c55"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn","text":"\u7531\u65bc Sklearn \u6c92\u6709\u5c01\u88dd\u597d\u7684\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb\u3002\u4e0d\u904e\u6211\u5011\u53ef\u4ee5\u900f\u904e make_pipeline \u5c07 PolynomialFeatures \u8207 LinearRegression \u5c01\u88dd\u6210\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\uff0c\u4e26\u4e14\u4f7f\u7528\u8005\u53ef\u4ee5\u96a8\u610f\u8a2d\u5b9a degree(\u6b21\u65b9)\u503c\u3002 \u6211\u5011\u53ef\u4ee5\u5c0d\u539f\u672c\u7684\u7279\u5fb5\u9032\u884c PolynomialFeatures \u69cb\u9020\u65b0\u6a23\u672c\u7279\u5fb5\u63a1\u3002\u4e26\u5c07\u8f49\u63db\u5f8c\u7684\u7279\u5fb5\u9001\u5230\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u9032\u884c\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u81ea\u5b9a\u7fa9\u4e00\u500b PolynomialRegression() \u7684\u51fd\u5f0f\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u8f38\u5165 degree \u5927\u5c0f\u63a7\u5236\u6a21\u578b\u7684\u5f37\u5ea6\u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u4f7f\u7528 Sklearn \u7684 pipeline \u65b9\u6cd5\u5c07 PolynomialFeatures \u7279\u5fb5\u8f49\u63db\u8207 LinearRegression \u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5c01\u88dd\u8d77\u4f86\u3002\u53e6\u5916\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u900f\u904e\u81ea\u8a02\u7fa9\u7684 make_data() \u51fd\u5f0f\u7522\u751f\u4e00\u7d44\u96a8\u6a5f\u7684 x \u548c y\u3002\u8a72\u51fd\u5f0f\u4e2d\u53ef\u4ee5\u8a2d\u5b9a\u96a8\u6a5f\u8cc7\u6599\u7684\u6bd4\u6578\uff0c\u4e0b\u9762\u7a0b\u5f0f\u4e2d\u6211\u5011\u5148\u96a8\u6a5f\u5efa\u7acb 100 \u7b46\u6578\u64da\u3002 from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline plt . style . use ( 'seaborn' ) # make_pipeline\u662f\u6307\u53ef\u4ee5\u5c07\u591a\u500bSklearn\u7684function\u4e00\u8d77\u57f7\u884c def PolynomialRegression ( degree = 2 , ** kwargs ): return make_pipeline ( PolynomialFeatures ( degree ), LinearRegression ( ** kwargs )) # \u96a8\u6a5f\u5b9a\u7fa9\u65b0\u7684x,y\u503c def make_data ( N , err = 1 , rseed = 42 ): rng = np . random . RandomState ( rseed ) x = rng . rand ( N , 1 ) ** 2 y = 10 - 1 / ( x . ravel () + 0.1 ) if err > 0 : y += err * rng . randn ( N ) return x , y X , y = make_data ( 100 ) \u8a13\u7df4\u8cc7\u6599\u8207\u6e2c\u8a66\u8cc7\u6599\u90fd\u5efa\u7acb\u5b8c\u6210\u5f8c\u3002\u6211\u5011\u5c31\u53ef\u4ee5\u5c07\u8a13\u7df4\u8cc7\u6599\u4e1f\u5165\u5efa\u7acb\u597d\u7684 PolynomialRegression() \u4e26\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u4e0b\u9762\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u6f14\u793a degree \u7b49\u65bc 1\u30013\u30019\uff0c\u4e26\u4f86\u67e5\u770b\u96a8\u8457\u6b21\u65b9\u6578\u7684\u589e\u9577\u5c0d\u65bc\u6a21\u578b\u7684\u64ec\u5408\u7a0b\u5ea6\u7684\u5f71\u97ff\u3002 # \u6e2c\u8a66\u8cc7\u6599\u96c6 x_test = np . linspace ( - 0.1 , 1.1 , 500 )[:, None ] # \u7e6a\u88fd\u771f\u5be6\u7b54\u6848\u7684\u5206\u4f48 plt . scatter ( X . ravel (), y , color = 'black' ) # \u6e2c\u8a66 1,3,7 \u7684degree for degree in [ 1 , 3 , 9 ]: y_test = PolynomialRegression ( degree ) . fit ( X , y ) . predict ( x_test ) plt . plot ( x_test . ravel (), y_test , label = 'degree= {} ' . format ( degree )) plt . xlim ( - 0.1 , 1.0 ) plt . ylim ( - 2 , 12 ) plt . legend ( loc = 'best' ) \u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u96a8\u8457\u6b21\u65b9\u6578 degree \u7684\u589e\u9577\u6a21\u578b\u6703\u8b8a\u5f97\u8d8a\u8907\u96dc\u3002\u540c\u6642\u5c0d\u65bc\u8a13\u7df4\u6578\u64da\u7684\u64ec\u5408\u7d50\u679c\u8d8a\u597d\u3002\u4f46\u662f\u9019\u88e1\u5fc5\u9808\u6ce8\u610f\u4e26\u975e\u8d8a\u5927\u7684 degree \u5c31\u662f\u8d8a\u597d\u7684\uff0c\u56e0\u70ba\u96a8\u8457\u6a21\u578b\u8907\u96dc\u6703\u6709\u904e\u5ea6\u64ec\u5408\u7684\u8de1\u8c61\u3002\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u627e\u51fa\u4e00\u500b\u9069\u7576\u7684 degree \u6578\u503c\u4e26\u8207\u6e2c\u8a66\u96c6\u9a57\u8b49\u8207\u8a55\u4f30\u3002\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684 MSE \u5dee\u8ddd\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u5982\u679c\u6211\u5011\u4e00\u6627\u7684\u8ffd\u6c42\u8a13\u7df4\u96c6\u7684\u640d\u5931\u6700\u5c0f\u5316\uff0c\u53ef\u80fd\u6703\u5f71\u97ff\u5230\u6e2c\u8a66\u96c6\u7684\u8868\u73fe\u80fd\u529b\u5c0e\u81f4\u9810\u6e2c\u7d50\u679c\u8b8a\u5dee\u3002","title":"Sklearn \u5be6\u4f5c\u591a\u9805\u5f0f\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#gradient-descent","text":"\u63a5\u4e0b\u4f86\u6211\u5011\u4f86\u8a0e\u8ad6\u512a\u5316\u554f\u984c\u7684\u7b2c\u4e8c\u7a2e\u65b9\u6cd5\uff0c\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002\u68af\u5ea6\u4e0b\u964d\u4e0d\u50c5\u9650\u65bc\u7dda\u6027\u8ff4\u6b78\uff0c\u5728\u975e\u7dda\u6027\u548c\u795e\u7d93\u7db2\u7d61\u540c\u6a23\u9069\u7528\u3002\u4e0b\u5716\u4e2d\u6bcf\u4e00\u500b\u9ede\u662f\u8a13\u7df4\u96c6\u7684\u6a23\u672c x \u8ef8\u70ba\u8f38\u5165\u503c y \u8ef8\u70ba\u8f38\u51fa\u503c\u3002\u4e5f\u5c31\u662f\u5e73\u9762\u4e0a\u6bcf\u500b\u9ede x \u90fd\u6703\u6709\u4e00\u500b\u76f8\u5c0d\u61c9 y \u7684\u8f38\u51fa\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u505a\u7684\u4e8b\u60c5\u662f\u70ba\u9019\u4e9b\u9ede\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u9019\u689d\u76f4\u7dda\u80fd\u5920\u76e1\u53ef\u80fd\u53cd\u6620\u51fa x \u8207 y \u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u6b64\u5916\u6211\u5011\u90fd\u77e5\u9053\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\u6211\u5011\u80fd\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\uff0c\u90a3\u6211\u5011\u8a72\u5982\u4f55\u627e\u5230\u9019\u689d\u6700\u4f73\u7684\u76f4\u7dda\u5462\uff1f\u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u4f7f\u5f97\u9019\u4e9b\u8a13\u7df4\u8cc7\u6599\u4e2d\u7684\u6bcf\u500b\u6a23\u672c\u9ede\u5230\u9019\u4e00\u689d\u76f4\u7dda\u7684\u8ddd\u96e2\u5e73\u65b9\u548c\u8981\u6700\u5c0f\u3002\u56e0\u6b64\u9019\u88e1\u6211\u5011\u5c07\u8a0e\u8ad6\u8a72\u5982\u4f55\u4f7f\u7528\u68af\u5ea6\u4e0b\u964d\u6cd5\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u9996\u5148\u6211\u5011\u5047\u8a2d\u4e00\u500b\u76f4\u7dda\u7684\u65b9\u7a0b\u5f0f\u662f y = \u03b2 0 + \u03b2 1 x\u3002\u90a3\u9996\u5148\u6211\u5011\u53ef\u4ee5\u5148\u96a8\u6a5f\u7684\u7d66\u4e88 \u03b2 0 \u548c \u03b2 1 \u4e00\u500b\u521d\u59cb\u503c\u3002\u4e26\u5f97\u5230\u4e0b\u5716\u4e2d\u7684\u7d50\u679c\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u9019\u4e00\u689d\u76f4\u7dda\u4e26\u4e0d\u80fd\u53cd\u6620\u51fa x \u548c y \u7684\u95dc\u806f\u6027\u3002 \u5982\u679c\u6211\u5011\u4e0d\u65b7\u7684\u8fed\u4ee3\uff0c\u6bcf\u4e00\u6b21\u7684\u8fed\u4ee3\u90fd\u8b93\u9019\u4e00\u689d\u76f4\u7dda\u671d\u8457\u66f4\u7b26\u5408\u6578\u64da\u9ede\u7684\u65b9\u5411\u79fb\u52d5\u4e00\u9ede\uff0c\u90a3\u9ebc\u7d93\u904e\u8a31\u591a\u6b21\u7684\u66f4\u65b0\u6211\u5011\u5c31\u53ef\u4ee5\u5f97\u5230\u6700\u4f73\u7684\u7d50\u679c\u3002\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u8981\u66f4\u65b0\u6240\u6709\u7684\u53c3\u6578\uff0c\u4f8b\u5982\uff1a \u03b2 0 \u548c \u03b2 1 \uff0c\u76f4\u5230\u5f97\u5230\u6700\u5c0f\u7684 MSE \u6216\u662f\u9810\u5b9a\u7684\u8fed\u4ee3\u6b21\u6578\u3002\u4ee5\u4e0b\u7684\u516c\u5f0f\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u7684\u8868\u9054\u5f0f\u3002\u5b83\u53cd\u6620\u7684\u662f\u6bcf\u6b21\u8fed\u4ee3\uff0c\u6211\u5011\u7684 \u03b2 0 \u548c \u03b2 1 \u9019\u4e9b\u53c3\u6578\u662f\u5982\u4f55\u8abf\u6574\u7684\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u9019\u500b\u516c\u5f0f\u5f97\u77e5\uff0c\u4ed6\u662f\u5c0d\u640d\u5931\u51fd\u6578\u6c42\u4e86\u67d0\u4e00\u500b\u7279\u5b9a\u53c3\u6578\u7684\u504f\u5c0e\u3002\u9019\u5c31\u662f\u6240\u8b02\u7684\u68af\u5ea6\uff0c\u6211\u5011\u671d\u8457\u68af\u5ea6\u7684\u53cd\u65b9\u5411\u5728\u66f4\u65b0\u3002\u7136\u800c\u6bcf\u4e00\u6b21\u8981\u66f4\u65b0\u591a\u5927\u53ef\u4ee5\u4f9d\u9760 \u03b7\uff08(eta) \u4f86\u63a7\u5236\uff0c\u56e0\u6b64\u6211\u5011\u7b97\u51fa\u4f86\u7684\u68af\u5ea6\u9084\u6703\u4e58\u4e0a\u4e00\u500b\u5b78\u7fd2\u901f\u7387\u4f86\u9632\u6b62\u66f4\u65b0\u6b65\u4f10\u592a\u5927\u800c\u5c0e\u81f4\u627e\u4e0d\u5230\u89e3\u3002\u6240\u4ee5 \u03b7 \u7684\u5927\u5c0f\u8981\u9069\u4e2d\u4ee5\u514d\u5f71\u97ff\u5230\u6a21\u578b\u6700\u7d42\u7684\u6536\u6582\u3002 \u6b64\u5916\u9019\u500b\u6a21\u578b\u5982\u679c\u900f\u904e\u68af\u5ea6\u4e0b\u964d\u6cd5\u9084\u6709\u4e00\u500b\u7f3a\u9ede\uff0c\u90a3\u5c31\u662f\u7576\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\u4e0d\u662f\u4e00\u500b\u51f8\u51fd\u6578(convex function) \u7684\u6642\u5019\u5b83\u5c31\u6703\u5b58\u5728\u8a31\u591a\u500b\u6700\u4f4e\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u5728\u6211\u5011\u9078\u64c7\u4e0d\u540c\u7684 \u03b2 0 \u548c \u03b2 1 \u4f5c\u70ba\u521d\u59cb\u503c\u7684\u6642\u5019\u5f88\u53ef\u80fd\u6703\u6536\u6582\u65bc\u4e0d\u540c\u7684\u5c40\u90e8\u6700\u4f73\u89e3(local optimum)\u3002\u4e5f\u5c31\u662f\u8aaa\u6211\u5011\u6c42\u5f97\u7684\u6700\u4f73\u7684\u6a21\u578b\u5f88\u6709\u6a5f\u6703\u662f\u5c40\u90e8\u6700\u4f73\u89e3\u800c\u4e0d\u662f\u5168\u5c40\u6700\u4f73\u89e3(global optimum)\u3002","title":"Gradient descent (\u68af\u5ea6\u4e0b\u964d\u6cd5)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn-sgdregressor","text":"Sklearn \u63d0\u4f9b\u4e86 SGDRegressor \u4e26\u5be6\u73fe\u4e86\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5b78\u7fd2\u3002\u4f60\u53ef\u80fd\u6703\u554f\u68af\u5ea6\u4e0b\u964d\u8207\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5169\u8005\u5dee\u5225\u5728\u54ea\uff1f\u7c21\u55ae\u4f86\u8aaa\u4e00\u822c\u7684\u68af\u5ea6\u4e0b\u964d\u6cd5\u662f\u4e00\u6b21\u7528\u5168\u90e8\u8a13\u7df4\u96c6\u7684\u6578\u64da\u8a08\u7b97\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\uff0c\u7136\u5f8c\u505a\u4e00\u6b21\u53c3\u6578\u7684\u66f4\u65b0\u4fee\u6b63\u3002\u800c\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u5c31\u662f\u4e00\u6b21\u8dd1\u4e00\u500b\u6a23\u672c\u6216\u662f\u5c0f\u6279\u6b21\u6a23\u672c\uff0c\u7136\u5f8c\u7b97\u51fa\u4e00\u6b21\u68af\u5ea6\u4e26\u66f4\u65b0\u3002\u800c\u6240\u8b02\u7684\u96a8\u6a5f\u5c31\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u96a8\u6a5f\u5730\u62bd\u53d6\u6a23\u672c\uff0c\u6240\u4ee5\u624d\u6703\u7a31\u70ba\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002 import numpy as np from sklearn.linear_model import SGDRegressor from sklearn.metrics import mean_squared_error # \u96a8\u6a5f\u7522\u751f\u4e00\u500b\u7279\u5fb5\u7684X\u8207\u8f38\u51fay X , y = make_data ( 100 ) # \u5efa\u7acb SGDRegressor \u4e26\u8a2d\u7f6e\u8d85\u53c3\u6578 regModel = SGDRegressor ( max_iter = 100 ) # \u8a13\u7df4\u6a21\u578b regModel . fit ( X , y ) # \u5efa\u7acb\u6e2c\u8a66\u8cc7\u6599 x_test = np . linspace ( - 0.05 , 1 , 500 )[:, None ] # \u9810\u6e2c\u6e2c\u8a66\u96c6 y_test = regModel . predict ( x_test ) # \u9810\u6e2c\u8a13\u7df4\u96c6 y_pred = regModel . predict ( X ) # \u8996\u89ba\u5316\u9810\u6e2c\u7d50\u679c plt . scatter ( X , y ) plt . plot ( x_test . ravel (), y_test , color = \"#d62728\" ) plt . xlabel ( 'x' ) plt . ylabel ( 'y' ) plt . text ( 0 , 10 , 'Loss(MSE)= %.3f ' % mean_squared_error ( y_pred , y ), fontdict = { 'size' : 15 , 'color' : 'red' }) plt . show () \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528 Sklearn SGDRegressor"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/","text":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u7dda\u6027\u5206\u985e\u5668 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u6bd4\u8f03\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b \u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5efa\u7acb\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u662f\u7531\u7dda\u6027\u8ff4\u6b78\u8b8a\u5316\u800c\u4f86\u7684\uff0c\u5b83\u662f\u4e00\u7a2e\u5206\u985e\u7684\u6a21\u578b\u3002\u5176\u76ee\u6a19\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u6240\u6709\u6578\u64da\u6e05\u695a\u5730\u5206\u958b\u4e26\u505a\u5206\u985e\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u8ff4\u6b78\u7684\u7dda\u6027\u5206\u985e\u5668\u3002\u908f\u8f2f\u8ff4\u6b78\u5176\u5be6\u662f\u5728\u8aaa\u660e\u4e00\u500b\u6a5f\u7387\u7684\u610f\u7fa9\uff0c\u900f\u904e\u4e00\u500b function \u53bb\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\uff0c\u4e0d\u540c\u7684 w,b \u5c31\u6703\u5f97\u5230\u4e0d\u540c\u7684 function\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u8aaa f w,b (x) \u5373\u70ba posteriror probability\u3002 \u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u662f\u7528\u4f86\u8655\u7406\u5206\u985e\u554f\u984c\uff0c\u76ee\u6a19\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u5c07\u8cc7\u6599\u505a\u5206\u985e\u3002\u4e3b\u8981\u662f\u5229\u7528 sigmoid function \u5c07\u8f38\u51fa\u8f49\u63db\u6210 0~1 \u7684\u503c\uff0c\u8868\u793a\u53ef\u80fd\u70ba\u9019\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002\u800c\u7dda\u6027\u8ff4\u6b78\u662f\u7528\u4f86\u9810\u6e2c\u4e00\u500b\u9023\u7e8c\u7684\u503c\uff0c\u76ee\u6a19\u662f\u60f3\u627e\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u903c\u8fd1\u771f\u5be6\u7684\u8cc7\u6599\u3002 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u500b\u6700\u57fa\u672c\u7684\u4e8c\u5143\u7dda\u6027\u5206\u985e\u5668\u3002\u6211\u5011\u8981\u627e\u4e00\u500b\u6a5f\u7387 (posterior probability) \u7576\u6a5f\u7387 P(C1|x) \u5927\u65bc 0.5 \u6642\u5247\u8f38\u51fa\u9810\u6e2c Class 1\uff0c\u53cd\u4e4b\u6a5f\u7387\u5c0f\u65bc 0.5 \u5247\u8f38\u51fa Class 2\u3002\u5982\u679c\u6211\u5011\u5047\u8a2d\u8cc7\u6599\u662f Gaussian \u6a5f\u7387\u5206\u4f48\uff0c\u6211\u5011\u53ef\u4ee5\u8aaa\u9019\u500b posterior probability \u5c31\u662f \ud835\udf0e(\ud835\udc67)\u3002\u5176\u4e2d z=w*x+b \uff0cx \u70ba\u8f38\u5165\u7279\u5fb5\uff0c\u800c w \u8207 b \u5206\u5225\u70ba\u6b0a\u91cd(weight)\u8207\u504f\u6b0a\u503c(bias) \u4ed6\u5011\u662f\u900f\u904e\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\u3002 \u4ee5\u4e0b\u5c31\u662f\u4e00\u500b\u908f\u8f2f\u8ff4\u6b78\u7684\u904b\u4f5c\u6a5f\u5236\uff0c\u5982\u679c\u4ee5\u5716\u50cf\u5316\u8868\u793a\u6703\u9577\u9019\u6a23\u3002\u6211\u5011\u7684 function \u6703\u6709\u5169\u7d44\u53c3\u6578\uff0c\u4e00\u7d44\u662f w \u6211\u5011\u7a31\u70ba weight\uff0c\u53e6\u4e00\u500b\u5e38\u6578 b \u7a31\u70ba bias\u3002\u5047\u8a2d\u6211\u5011\u6709\u5169\u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4e26\u5c07\u9019\u5169\u500b\u8f38\u5165\u5206\u5225\u4e58\u4e0a w \u518d\u52a0\u4e0a b \u5c31\u53ef\u4ee5\u5f97\u5230 z\uff0c\u7136\u5f8c\u901a\u904e\u4e00\u500b sigmoid function \u5f97\u5230\u7684\u8f38\u51fa\u5c31\u662f posterior probability\u3002 \u5728\u908f\u8f2f\u8ff4\u6b78\u4e2d\u6211\u5011\u5b9a\u7fa9\u7684\u640d\u5931\u51fd\u6578\u662f\u8981\u53bb\u6700\u5c0f\u5316\u7684\u5c0d\u8c61\u662f\u6240\u6709\u8a13\u7df4\u8cc7\u6599 cross entropy \u7684\u7e3d\u548c\u3002\u6211\u5011\u5e0c\u671b\u6a21\u578b\u7684\u8f38\u51fa\u8981\u8ddf\u76ee\u6a19\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5c07\u6700\u5c0f\u5316\u7684\u76ee\u6a19\u5beb\u6210\u4e00\u500b\u51fd\u6578\uff1a \u6700\u5f8c\u662f\u5c0b\u627e\u4e00\u7d44\u6700\u597d\u7684\u53c3\u6578\uff0c\u4f7f\u5f97 loss \u80fd\u5920\u6700\u4f4e\u3002\u56e0\u6b64\u9019\u88e1\u63a1\u7528\u68af\u5ea6\u4e0b\u964d (Gradient Descent) \u4f86\u6700\u5c0f\u5316\u4ea4\u53c9\u71b5 (Cross Entropy)\u3002\u6211\u5011\u5c07\u640d\u5931\u51fd\u6578\u5c0d\u6b0a\u91cd\u6c42\u504f\u5c0e\u5f8c\uff0c\u53ef\u4ee5\u5f97\u5230\u4e0b\u9762\u7684\u6b0a\u91cd\u66f4\u65b0\u7684\u5f0f\u5b50\uff1a \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 (Multinomial Logistic Regression) \u5728 Sklearn \u4e2d\u4e5f\u80fd\u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5206\u985e\u5668\u61c9\u7528\u5728\u591a\u985e\u5225\u7684\u5206\u985e\u554f\u984c\u4e0a\uff0c\u5c0d\u65bc\u591a\u5143\u908f\u8f2f\u8ff4\u6b78\u6709 one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u65b9\u6cd5\u3002\u5169\u8005\u7684\u505a\u6cd5\u90fd\u662f\u5c07\u6240\u6709\u985e\u5225\u7684\u8cc7\u6599\u4f9d\u5e8f\u4f5c\u4e8c\u5143\u5206\u985e\u8a13\u7df4\u3002MvM \u76f8\u8f03\u65bc OvR \u6bd4\u8f03\u7cbe\u6e96\uff0c\u4f46 liblinear \u53ea\u652f\u63f4 OvR\u3002 one-vs-rest(OvR): \u8a13\u7df4\u6642\u628a\u67d0\u500b\u985e\u5225\u7684\u8cc7\u6599\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u8cc7\u6599\u6b78\u70ba\u53e6\u4e00\u985e\u505a\u908f\u8f2f\u8ff4\u6b78\uff0c\u56e0\u6b64\u82e5\u6709 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u6703\u6709 k \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u9996\u5148\u62bd\u53d6 A \u985e\u5225\u7684\u8cc7\u6599\u505a\u70ba\u6b63\u96c6\uff0cB\u3001C \u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; B \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001C \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; C \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001B \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6\u3002\u900f\u904e\u9019\u4e09\u7d44\u8a13\u7df4\u96c6\u5206\u5225\u9032\u884c\u8a13\u7df4\uff0c\u7136\u5f8c\u7684\u5f97\u5230\u4e09\u500b\u5206\u985e\u5668 f1(x)\u3001f2(x)\u3001f3(x)\u3002\u9810\u6e2c\u7684\u6642\u5019\u5c31\u662f\u628a\u8cc7\u6599\u4e1f\u9032\u4e09\u500b\u5206\u985e\u5668\uff0c\u67e5\u770b\u54ea\u500b\u5206\u985e\u5668\u9810\u6e2c\u7684\u5206\u6578\u6700\u9ad8\u5c31\u6c7a\u5b9a\u8a72\u985e\u5225\u3002 many-vs-many(MvM): \u8207 OvR \u5dee\u5225\u5728\u65bc\u8a13\u7df4\u6642\u6bcf\u6b21\u53ea\u6703\u6311\u5169\u500b\u985e\u5225\u8a13\u7df4\u4e00\u500b\u5206\u985e\u5668\uff0c\u56e0\u6b64 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u5c31\u9700\u8981 k(k-1)/2 \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u56e0\u6b64\u6211\u5011\u6703\u6709\u4e09\u7d44\u4e8c\u5143\u5206\u985e\u5668\u5206\u5225\u6709 (A\u3001B)\u3001(A\u3001C) \u8207 (B\u3001C)\u3002\u8a13\u7df4\u5b8c\u6210\u5f8c\u7576\u6709\u65b0\u8cc7\u6599\u8981\u9810\u6e2c\u6642\uff0c\u628a\u8cc7\u6599\u5206\u5225\u5c0d\u4e09\u500b\u4e8c\u5143\u5206\u985e\u5668\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u5f97\u5230\u9810\u6e2c\u7d50\u679c\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u908f\u8f2f\u8ff4\u6b78 (\u5206\u985e\u5668) \u908f\u8f2f\u8ff4\u6b78\u96d6\u7136\u6709\u8ff4\u6b78\u5169\u5b57\u4f46\u4ed6\u5176\u5be6\u662f\u88ab\u7528\u4f86\u505a\u5206\u985e\u7684\uff0c\u76ee\u7684\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u5169\u500b\u985e\u5225\u5206\u958b\u3002\b\u672c\u7bc4\u4f8b\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u5206\u985e\u5668\u5be6\u9a57\uff0c\u5e0c\u671b\u80fd\u5920\u900f\u904e\u7dda\u6027\u5206\u985e\u5668\u5c07\u4e09\u500b\u985e\u5225\u5f7c\u6b64\u5340\u9694\u958b\u3002 Parameters: - penalty: \u6b63\u898f\u5316l1/l2\uff0c\u9632\u6b62\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002 - C: \u6578\u503c\u8d8a\u5927\u5c0d weight \u7684\u63a7\u5236\u529b\u8d8a\u5f31\uff0c\u9810\u8a2d\u70ba1\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - solver: \u512a\u5316\u5668\u7684\u9078\u64c7\u3002newton-cg,lbfgs,liblinear,sag,saga\u3002\u9810\u8a2d\u70baliblinear\u3002 - multi_class: \u9078\u64c7\u5206\u985e\u65b9\u5f0f\uff0covr\u5c31\u662fone-vs-rest(OvR)\uff0c\u800cmultinomial\u5c31\u662fmany-vs-many(MvM)\u3002\u9810\u8a2d\u70ba auto\uff0c\u6545\u6a21\u578b\u8a13\u7df4\u4e2d\u6703\u53d6\u4e00\u500b\u6700\u597d\u7684\u7d50\u679c\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba100\u4ee3\u3002 - class_weight: \u82e5\u9047\u8cc7\u6599\u4e0d\u5e73\u8861\u554f\u984c\u53ef\u4ee5\u8a2d\u5b9abalance\uff0c\u9810\u8a2d=None\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\u50c5\u5728solver=sag/liblinear\u6642\u6709\u7528\u3002 Attributes: - coef_: \u53d6\u5f97\u659c\u7387\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.linear_model import LogisticRegression # \u5efa\u7acbLogistic\u6a21\u578b logisticModel = LogisticRegression ( random_state = 0 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b logisticModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = logisticModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , logisticModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , logisticModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9714285714285714 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u900f\u904e Sklearn \u7684 LogisticRegression \u53ef\u4ee5\u5be6\u4f5c\u4e00\u500b\u5178\u578b\u7684\u4e8c\u5143\u5206\u985e\u5668\u3002\u4e0d\u904e\u7576\u6709\u591a\u500b\u985e\u5225\u7684\u6642\u5019\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53c3\u6578 multi_class \u4f86\u8a2d\u5b9a\u591a\u5143\u5206\u985e\u5668\u7684\u5b78\u7fd2\u6a5f\u5236\u3002\u6211\u5011\u53ef\u4ee5\u89c0\u5bdf\u4e00\u4e0b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u70ba\u4e86\u65b9\u4fbf\u89c0\u5bdf\u8a13\u7df4\u7d50\u679c\uff0c\u56e0\u6b64\u6211\u5011\u53ea\u6311\u9078\u5176\u4e2d\u5169\u500b\u7279\u5fb5\u4e26\u7e6a\u88fd\u5e73\u9762\u7684\u9ede\u6563\u5716\u3002\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#day-9-logistic-regression","text":"","title":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_1","text":"\u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u7dda\u6027\u5206\u985e\u5668 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u6bd4\u8f03\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b \u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5efa\u7acb\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_2","text":"\u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u662f\u7531\u7dda\u6027\u8ff4\u6b78\u8b8a\u5316\u800c\u4f86\u7684\uff0c\u5b83\u662f\u4e00\u7a2e\u5206\u985e\u7684\u6a21\u578b\u3002\u5176\u76ee\u6a19\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u6240\u6709\u6578\u64da\u6e05\u695a\u5730\u5206\u958b\u4e26\u505a\u5206\u985e\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u8ff4\u6b78\u7684\u7dda\u6027\u5206\u985e\u5668\u3002\u908f\u8f2f\u8ff4\u6b78\u5176\u5be6\u662f\u5728\u8aaa\u660e\u4e00\u500b\u6a5f\u7387\u7684\u610f\u7fa9\uff0c\u900f\u904e\u4e00\u500b function \u53bb\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\uff0c\u4e0d\u540c\u7684 w,b \u5c31\u6703\u5f97\u5230\u4e0d\u540c\u7684 function\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u8aaa f w,b (x) \u5373\u70ba posteriror probability\u3002","title":"\u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_3","text":"\u908f\u8f2f\u8ff4\u6b78\u662f\u7528\u4f86\u8655\u7406\u5206\u985e\u554f\u984c\uff0c\u76ee\u6a19\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u5c07\u8cc7\u6599\u505a\u5206\u985e\u3002\u4e3b\u8981\u662f\u5229\u7528 sigmoid function \u5c07\u8f38\u51fa\u8f49\u63db\u6210 0~1 \u7684\u503c\uff0c\u8868\u793a\u53ef\u80fd\u70ba\u9019\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002\u800c\u7dda\u6027\u8ff4\u6b78\u662f\u7528\u4f86\u9810\u6e2c\u4e00\u500b\u9023\u7e8c\u7684\u503c\uff0c\u76ee\u6a19\u662f\u60f3\u627e\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u903c\u8fd1\u771f\u5be6\u7684\u8cc7\u6599\u3002","title":"\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_4","text":"\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u500b\u6700\u57fa\u672c\u7684\u4e8c\u5143\u7dda\u6027\u5206\u985e\u5668\u3002\u6211\u5011\u8981\u627e\u4e00\u500b\u6a5f\u7387 (posterior probability) \u7576\u6a5f\u7387 P(C1|x) \u5927\u65bc 0.5 \u6642\u5247\u8f38\u51fa\u9810\u6e2c Class 1\uff0c\u53cd\u4e4b\u6a5f\u7387\u5c0f\u65bc 0.5 \u5247\u8f38\u51fa Class 2\u3002\u5982\u679c\u6211\u5011\u5047\u8a2d\u8cc7\u6599\u662f Gaussian \u6a5f\u7387\u5206\u4f48\uff0c\u6211\u5011\u53ef\u4ee5\u8aaa\u9019\u500b posterior probability \u5c31\u662f \ud835\udf0e(\ud835\udc67)\u3002\u5176\u4e2d z=w*x+b \uff0cx \u70ba\u8f38\u5165\u7279\u5fb5\uff0c\u800c w \u8207 b \u5206\u5225\u70ba\u6b0a\u91cd(weight)\u8207\u504f\u6b0a\u503c(bias) \u4ed6\u5011\u662f\u900f\u904e\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\u3002 \u4ee5\u4e0b\u5c31\u662f\u4e00\u500b\u908f\u8f2f\u8ff4\u6b78\u7684\u904b\u4f5c\u6a5f\u5236\uff0c\u5982\u679c\u4ee5\u5716\u50cf\u5316\u8868\u793a\u6703\u9577\u9019\u6a23\u3002\u6211\u5011\u7684 function \u6703\u6709\u5169\u7d44\u53c3\u6578\uff0c\u4e00\u7d44\u662f w \u6211\u5011\u7a31\u70ba weight\uff0c\u53e6\u4e00\u500b\u5e38\u6578 b \u7a31\u70ba bias\u3002\u5047\u8a2d\u6211\u5011\u6709\u5169\u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4e26\u5c07\u9019\u5169\u500b\u8f38\u5165\u5206\u5225\u4e58\u4e0a w \u518d\u52a0\u4e0a b \u5c31\u53ef\u4ee5\u5f97\u5230 z\uff0c\u7136\u5f8c\u901a\u904e\u4e00\u500b sigmoid function \u5f97\u5230\u7684\u8f38\u51fa\u5c31\u662f posterior probability\u3002 \u5728\u908f\u8f2f\u8ff4\u6b78\u4e2d\u6211\u5011\u5b9a\u7fa9\u7684\u640d\u5931\u51fd\u6578\u662f\u8981\u53bb\u6700\u5c0f\u5316\u7684\u5c0d\u8c61\u662f\u6240\u6709\u8a13\u7df4\u8cc7\u6599 cross entropy \u7684\u7e3d\u548c\u3002\u6211\u5011\u5e0c\u671b\u6a21\u578b\u7684\u8f38\u51fa\u8981\u8ddf\u76ee\u6a19\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5c07\u6700\u5c0f\u5316\u7684\u76ee\u6a19\u5beb\u6210\u4e00\u500b\u51fd\u6578\uff1a \u6700\u5f8c\u662f\u5c0b\u627e\u4e00\u7d44\u6700\u597d\u7684\u53c3\u6578\uff0c\u4f7f\u5f97 loss \u80fd\u5920\u6700\u4f4e\u3002\u56e0\u6b64\u9019\u88e1\u63a1\u7528\u68af\u5ea6\u4e0b\u964d (Gradient Descent) \u4f86\u6700\u5c0f\u5316\u4ea4\u53c9\u71b5 (Cross Entropy)\u3002\u6211\u5011\u5c07\u640d\u5931\u51fd\u6578\u5c0d\u6b0a\u91cd\u6c42\u504f\u5c0e\u5f8c\uff0c\u53ef\u4ee5\u5f97\u5230\u4e0b\u9762\u7684\u6b0a\u91cd\u66f4\u65b0\u7684\u5f0f\u5b50\uff1a","title":"\u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#multinomial-logistic-regression","text":"\u5728 Sklearn \u4e2d\u4e5f\u80fd\u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5206\u985e\u5668\u61c9\u7528\u5728\u591a\u985e\u5225\u7684\u5206\u985e\u554f\u984c\u4e0a\uff0c\u5c0d\u65bc\u591a\u5143\u908f\u8f2f\u8ff4\u6b78\u6709 one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u65b9\u6cd5\u3002\u5169\u8005\u7684\u505a\u6cd5\u90fd\u662f\u5c07\u6240\u6709\u985e\u5225\u7684\u8cc7\u6599\u4f9d\u5e8f\u4f5c\u4e8c\u5143\u5206\u985e\u8a13\u7df4\u3002MvM \u76f8\u8f03\u65bc OvR \u6bd4\u8f03\u7cbe\u6e96\uff0c\u4f46 liblinear \u53ea\u652f\u63f4 OvR\u3002 one-vs-rest(OvR): \u8a13\u7df4\u6642\u628a\u67d0\u500b\u985e\u5225\u7684\u8cc7\u6599\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u8cc7\u6599\u6b78\u70ba\u53e6\u4e00\u985e\u505a\u908f\u8f2f\u8ff4\u6b78\uff0c\u56e0\u6b64\u82e5\u6709 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u6703\u6709 k \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u9996\u5148\u62bd\u53d6 A \u985e\u5225\u7684\u8cc7\u6599\u505a\u70ba\u6b63\u96c6\uff0cB\u3001C \u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; B \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001C \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; C \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001B \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6\u3002\u900f\u904e\u9019\u4e09\u7d44\u8a13\u7df4\u96c6\u5206\u5225\u9032\u884c\u8a13\u7df4\uff0c\u7136\u5f8c\u7684\u5f97\u5230\u4e09\u500b\u5206\u985e\u5668 f1(x)\u3001f2(x)\u3001f3(x)\u3002\u9810\u6e2c\u7684\u6642\u5019\u5c31\u662f\u628a\u8cc7\u6599\u4e1f\u9032\u4e09\u500b\u5206\u985e\u5668\uff0c\u67e5\u770b\u54ea\u500b\u5206\u985e\u5668\u9810\u6e2c\u7684\u5206\u6578\u6700\u9ad8\u5c31\u6c7a\u5b9a\u8a72\u985e\u5225\u3002 many-vs-many(MvM): \u8207 OvR \u5dee\u5225\u5728\u65bc\u8a13\u7df4\u6642\u6bcf\u6b21\u53ea\u6703\u6311\u5169\u500b\u985e\u5225\u8a13\u7df4\u4e00\u500b\u5206\u985e\u5668\uff0c\u56e0\u6b64 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u5c31\u9700\u8981 k(k-1)/2 \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u56e0\u6b64\u6211\u5011\u6703\u6709\u4e09\u7d44\u4e8c\u5143\u5206\u985e\u5668\u5206\u5225\u6709 (A\u3001B)\u3001(A\u3001C) \u8207 (B\u3001C)\u3002\u8a13\u7df4\u5b8c\u6210\u5f8c\u7576\u6709\u65b0\u8cc7\u6599\u8981\u9810\u6e2c\u6642\uff0c\u628a\u8cc7\u6599\u5206\u5225\u5c0d\u4e09\u500b\u4e8c\u5143\u5206\u985e\u5668\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u5f97\u5230\u9810\u6e2c\u7d50\u679c\u3002","title":"\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 (Multinomial Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_5","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_6","text":"\u908f\u8f2f\u8ff4\u6b78\u96d6\u7136\u6709\u8ff4\u6b78\u5169\u5b57\u4f46\u4ed6\u5176\u5be6\u662f\u88ab\u7528\u4f86\u505a\u5206\u985e\u7684\uff0c\u76ee\u7684\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u5169\u500b\u985e\u5225\u5206\u958b\u3002\b\u672c\u7bc4\u4f8b\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u5206\u985e\u5668\u5be6\u9a57\uff0c\u5e0c\u671b\u80fd\u5920\u900f\u904e\u7dda\u6027\u5206\u985e\u5668\u5c07\u4e09\u500b\u985e\u5225\u5f7c\u6b64\u5340\u9694\u958b\u3002 Parameters: - penalty: \u6b63\u898f\u5316l1/l2\uff0c\u9632\u6b62\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002 - C: \u6578\u503c\u8d8a\u5927\u5c0d weight \u7684\u63a7\u5236\u529b\u8d8a\u5f31\uff0c\u9810\u8a2d\u70ba1\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - solver: \u512a\u5316\u5668\u7684\u9078\u64c7\u3002newton-cg,lbfgs,liblinear,sag,saga\u3002\u9810\u8a2d\u70baliblinear\u3002 - multi_class: \u9078\u64c7\u5206\u985e\u65b9\u5f0f\uff0covr\u5c31\u662fone-vs-rest(OvR)\uff0c\u800cmultinomial\u5c31\u662fmany-vs-many(MvM)\u3002\u9810\u8a2d\u70ba auto\uff0c\u6545\u6a21\u578b\u8a13\u7df4\u4e2d\u6703\u53d6\u4e00\u500b\u6700\u597d\u7684\u7d50\u679c\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba100\u4ee3\u3002 - class_weight: \u82e5\u9047\u8cc7\u6599\u4e0d\u5e73\u8861\u554f\u984c\u53ef\u4ee5\u8a2d\u5b9abalance\uff0c\u9810\u8a2d=None\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\u50c5\u5728solver=sag/liblinear\u6642\u6709\u7528\u3002 Attributes: - coef_: \u53d6\u5f97\u659c\u7387\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.linear_model import LogisticRegression # \u5efa\u7acbLogistic\u6a21\u578b logisticModel = LogisticRegression ( random_state = 0 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b logisticModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = logisticModel . predict ( X_train )","title":"\u908f\u8f2f\u8ff4\u6b78 (\u5206\u985e\u5668)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , logisticModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , logisticModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9714285714285714 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u900f\u904e Sklearn \u7684 LogisticRegression \u53ef\u4ee5\u5be6\u4f5c\u4e00\u500b\u5178\u578b\u7684\u4e8c\u5143\u5206\u985e\u5668\u3002\u4e0d\u904e\u7576\u6709\u591a\u500b\u985e\u5225\u7684\u6642\u5019\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53c3\u6578 multi_class \u4f86\u8a2d\u5b9a\u591a\u5143\u5206\u985e\u5668\u7684\u5b78\u7fd2\u6a5f\u5236\u3002\u6211\u5011\u53ef\u4ee5\u89c0\u5bdf\u4e00\u4e0b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u70ba\u4e86\u65b9\u4fbf\u89c0\u5bdf\u8a13\u7df4\u7d50\u679c\uff0c\u56e0\u6b64\u6211\u5011\u53ea\u6311\u9078\u5176\u4e2d\u5169\u500b\u7279\u5fb5\u4e26\u7e6a\u88fd\u5e73\u9762\u7684\u9ede\u6563\u5716\u3002\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"}]} \ No newline at end of file +{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"\u5168\u6c11\u760bAI\u7cfb\u52172.0 \u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd \u524d\u8a00 \u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002 \u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0? \u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002 \u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283 \u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002 \u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90 \u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002\u6216\u662f\u53ef\u4ee5\u95b1\u8b80\u5176\u4ed6\u76f8\u95dc \u6587\u7ae0 \u3002 \u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70 \u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002 \u95dc\u65bc\u4f5c\u8005 \u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5168\u6c11\u760bAI\u7cfb\u52172.0"},{"location":"#ai20","text":"\u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd","title":"\u5168\u6c11\u760bAI\u7cfb\u52172.0"},{"location":"#_1","text":"\u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002","title":"\u524d\u8a00"},{"location":"#_2","text":"\u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002","title":"\u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0?"},{"location":"#_3","text":"\u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002","title":"\u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283"},{"location":"#_4","text":"\u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002\u6216\u662f\u53ef\u4ee5\u95b1\u8b80\u5176\u4ed6\u76f8\u95dc \u6587\u7ae0 \u3002","title":"\u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90"},{"location":"#_5","text":"\u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002","title":"\u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70"},{"location":"#_6","text":"\u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u95dc\u65bc\u4f5c\u8005"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/","text":"[Day 1] \u76ee\u6a19\u4ecb\u7d39 \u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd \u524d\u8a00 \u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002 \u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0? \u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002 \u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283 \u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002 \u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90 \u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002 \u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70 \u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002 \u95dc\u65bc\u4f5c\u8005 \u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 1] \u76ee\u6a19\u4ecb\u7d39"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#day-1","text":"\u7b2c13\u5c46iT\u90a6\u5e6b\u5fd9\u9435\u4eba\u8cfd","title":"[Day 1] \u76ee\u6a19\u4ecb\u7d39"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_1","text":"\u54c8\u56c9\u5927\u5bb6\u597d\u6211\u662f10\u7a0b\u5f0f\u4e2d\u768410\uff01\u6211\u662f \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u5f71\u7247\u6559\u5b78\u7d44 \u5168\u6c11\u760bAI\u7cfb\u5217 \u7684\u4f5c\u8005\uff0c\u7576\u6642\u8b1b\u89e3\u4e86\u4eba\u5de5\u667a\u6167\u7684\u57fa\u790e\u4ee5\u53ca\u5e38\u898b\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8207\u624b\u628a\u624b\u6559\u5b78\u3002\u7531\u65bc\u5927\u5bb6\u53cd\u61c9\u5f88\u71b1\u70c8\uff0c\u8b93\u6211\u770b\u5230\u4e86\u5927\u5bb6\u5c0d\u65bcAI\u7684\u5b78\u7fd2\u71b1\u5ff1\u3002\u4e5f\u56e0\u70ba\u4e0a\u4e00\u5c46\u7372\u5f97\u4e86\u5f71\u7247\u6559\u5b78\u7d44\u512a\u9078\uff0c\u6536\u5230\u4e86\u8a31\u591a\u66f8\u5546\u7684\u51fa\u7248\u9080\u8acb\uff0c\u7531\u65bc\u6211\u6c92\u6709\u6642\u9593\u8207\u52d5\u529b\u5c07\u9019\u4e9b\u5927\u91cf\u77e5\u8b58\u5beb\u6210\u6587\u7ae0\u56e0\u6b64\u90fd\u5a49\u62d2\u4e86\u3002\u56e0\u6b64\u6211\u60f3\u85c9\u7531\u9019\u4e00\u6b21\u9435\u4eba\u8cfd\u5c07\u4e0a\u4e00\u5c46\u7684\u5f71\u7247\u5167\u5bb9\u6574\u7406\u6210\u96fb\u5b50\u66f8\u7248\u672c\uff0c\u63d0\u4f9b\u5927\u5bb6\u5f71\u7247\u6559\u5b78\u8207\u6587\u5b57\u7248\u7684\u7b46\u8a18\u5167\u5bb9(\u5537\u547c\u66f8\u5546\u5feb\u770b\u904e\u4f86\uff5e)\u7576\u7136\u5167\u5bb9\u6703\u4ee5\u4e4b\u524d\u5f71\u7247\u6559\u5b78\u70ba\u57fa\u5e95\uff0c\u4e26\u52a0\u5165\u4e00\u4e9b\u65b0\u7684\u5143\u7d20\u8b93\u6587\u7ae0\u5167\u5bb9\u8b8a\u5f97\u66f4\u7d2e\u5be6\u3002\u5728\u5168\u65b0\u7684 \u5168\u6c11\u760bAI\u7cfb\u52172.0 \u4e2d\u6211\u6703\u4ecb\u7d39\u5be6\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4e26\u542b\u6709\u7a0b\u5f0f\u624b\u628a\u624b\u5be6\u4f5c\uff0c\u4ee5\u53ca\u8fd1\u5e74\u4f86\u71b1\u9580\u7684\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u8207\u6a21\u578b\u8abf\u53c3\u6280\u5de7\u3002\u9664\u6b64\u4e4b\u5916\u6211\u9084\u6703\u63d0\u5230\u5927\u5bb6\u6700\u611f\u8208\u8da3\u7684 AI \u6a21\u578b\u843d\u5730\u8207\u6574\u5408\u3002\u5e0c\u671b\u5728\u9019\u6b21\u7684\u9435\u4eba\u8cfd\u80fd\u5920\u5c07AI\u7684\u8cc7\u6e90\u6574\u7406\u5f97\u66f4\u8a73\u7d30\u4e26\u5206\u4eab\u7d66\u5404\u4f4d\u3002","title":"\u524d\u8a00"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_2","text":"\u5982\u679c\u60a8\u662f\u4e4b\u524d\u7684\u820a\u8b80\u8005\uff0c\u6b61\u8fce\u56de\u4f86\u70ba\u81ea\u5df1\u5145\u96fb\uff5e\u65b0\u7684\u7cfb\u5217\u6587\u7ae0\u4fdd\u8b49\u8b93\u4f60\u6536\u7a6b\u6eff\u6eff\uff01\u82e5\u60a8\u662f\u65b0\u4f86\u7684\u8b80\u8005\u6b61\u8fce\u52a0\u5165\u4eba\u5de5\u667a\u6167\u7684\u4e16\u754c\uff0c\u6b64\u7cfb\u5217\u6587\u7ae0\u6b63\u9069\u5408\u521d\u5b78\u8005\u95b1\u8b80\u3002\u53e6\u5916\u5efa\u8b70\u53ef\u4ee5\u642d\u914d\u6211 \u4e0a\u4e00\u5c46 \u9435\u4eba\u8cfd\u7684\u5f71\u7247\u6559\u5b78\u9032\u884c\u5b78\u7fd2\u3002","title":"\u6b64\u7cfb\u5217\u6559\u5b78\u9069\u5408\u8ab0?"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_3","text":"\u5728\u672c\u6b21\u9435\u4eba\u8cfd\u9810\u8a08\u65b0\u589e\u4e86\u8a31\u591a\u65b0\u5167\u5bb9\uff0c\u7279\u5225\u662f\u8fd1\u5e74\u4f86\u6bd4\u8f03\u65b0\u7684\u6f14\u7b97\u6cd5\u5957\u4ef6\uff0c\u4ee5\u53ca\u5728\u6a21\u578b\u8a13\u7df4\u4e2d\u5fc5\u9808\u6ce8\u610f\u7684\u5927\u5c0f\u4e8b\u3002\u672c\u7cfb\u5217\u8981\u5728\u77ed\u77ed30\u5929\u5167\u8b1b\u5b8c\u6240\u6709 AI \u9818\u57df\u76f8\u95dc\u61c9\u7528\u662f\u4e0d\u592a\u53ef\u80fd\u7684\u4e8b\u60c5\uff0c\u56e0\u6b64\u6211\u7684\u898f\u5283\u662f\u5f9e\u8a8d\u8b58\u4eba\u5de5\u667a\u6167\u958b\u59cb\u5207\u5165\u4e3b\u984c\u3002\u5148\u8b93\u5927\u5bb6\u77e5\u9053\u4f55\u8b02\u4eba\u5de5\u667a\u6167\u4ee5\u53ca\u76f8\u95dc\u61c9\u7528\u6709\u54ea\u4e9b\u3002\u63a5\u8457\u5e36\u5404\u4f4d\u4e86\u89e3\u6210\u70ba\u8cc7\u6599\u79d1\u5b78\u5bb6\u7684\u7b2c\u4e00\u6b65\uff0c\u5c31\u662f\u8cc7\u6599\u5206\u6790\u8207\u8996\u89ba\u5316\uff0c\u518d\u4f86\u6703\u6709\u4e00\u7cfb\u5217\u7d93\u5178\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ecb\u7d39\u3002\u6700\u5f8c\u4e5f\u662f\u5927\u5bb6\u53ef\u80fd\u6703\u6709\u8208\u8da3\u7684\u6574\u5408\u90e8\u5206\uff0c\u6703\u4ee5\u5be6\u969b\u7684\u5e36\u5927\u5bb6\u624b\u628a\u624b\u90e8\u7f72\u6211\u5011\u7684AI\u6a21\u578b\u4ee5\u53ca\u524d\u5f8c\u7aef\u4e32\u63a5\u7684\u6982\u5ff5\u3002","title":"\u7cfb\u5217\u6587\u7ae0\u5167\u5bb9\u898f\u5283"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_4","text":"\u672c\u7cfb\u5217\u6559\u5b78\u5c07\u6709\u5927\u91cf\u7684\u7a0b\u5f0f\u5be6\u4f5c\uff0c\u4e26\u63a1\u7528 Google Colab \u505a\u70ba\u7a0b\u5f0f\u96f2\u7aef\u904b\u884c\u7684\u7de8\u8f2f\u57f7\u884c\u74b0\u5883\u3002\u5404\u4f4d\u53ef\u4ee5\u76f4\u63a5\u5229\u7528 Colab \u958b\u555f\u672c\u7cfb\u5217\u6587\u7ae0\u7684\u7bc4\u4f8b\u7a0b\u5f0f\u3002\u5728\u4f7f\u7528\u6b64\u5e73\u53f0\u4e4b\u524d\u6bcf\u500b\u4eba\u90fd\u5fc5\u9808\u8981\u6709\u81ea\u5df1\u7684 Google \u5e33\u865f\uff0c\u624d\u80fd\u9806\u5229\u7684\u958b\u555f\u4e26\u57f7\u884c\u7a0b\u5f0f\u78bc\u3002Colab \u53ef\u8b93\u4f60\u8f15\u9b06\u5730\u5728\u700f\u89bd\u5668\u4e0a\u64b0\u5beb\u4e26\u57f7\u884c Python \u7a0b\u5f0f\u8a9e\u8a00\uff0c\u5b83\u53ef\u4ee5\u8aaa\u662f\u6a5f\u5668\u5b78\u7fd2\u65b0\u624b\u7684\u5165\u9580\u5de5\u5177\u3002\u6b64\u5916 Colab \u5177\u5099\u4e86\u4ee5\u4e0b\u5e7e\u500b\u512a\u9ede\uff1a \u4e0d\u5fc5\u9032\u884c\u4efb\u4f55\u8a2d\u5b9a\u8207\u5b89\u88dd \u514d\u8cbb\u984d\u5ea6\u4f7f\u7528 GPU\u3001TPU \u8cc7\u6e90 \u8f15\u9b06\u5171\u7528\u8207\u5206\u4eab\u6a94\u6848 \u56e0\u6b64\u8b80\u8005\u5fc5\u9808\u5148\u719f\u6089 Colab \u7684\u64cd\u4f5c\u6a21\u5f0f\uff0c\u60f3\u4e86\u89e3\u8a72\u5982\u4f55\u64cd\u4f5c\u7684\u670b\u53cb\u5011\u53ef\u4ee5\u5148\u4f86\u770b\u9019\u4e00\u6b65 \u5f71\u7247 \u6559\u5b78\u3002","title":"\u524d\u7f6e\u4f5c\u696d\u8cc7\u6e90"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_5","text":"\u672c\u7cfb\u5217\u6587\u7ae0\u82e5\u6709\u554f\u984c\u6216\u662f\u5167\u5bb9\u5efa\u8b70\u90fd\u53ef\u4ee5\u4f86 GitHub \u4e2d\u7684 issue \u63d0\u51fa\u3002\u6b61\u8fce\u5927\u5bb6\u4e00\u540c\u8ca2\u737b\u70ba\u9019\u7cfb\u5217\u6587\u7ae0\u6709\u66f4\u597d\u7684\u95b1\u8b80\u54c1\u8cea\u3002","title":"\u56de\u5831\u932f\u8aa4\u8207\u5efa\u8b70"},{"location":"1.\u5168\u6c11\u760bAI\u7cfb\u52172.0\u76ee\u6a19\u4ecb\u7d39/#_6","text":"\u66fe\u4efb\u8077\u65bc\u53f0\u7063\u4eba\u5de5\u667a\u6167\u5b78\u6821\uff0c\u64d4\u4efbAI\u5de5\u7a0b\u5e2b\uff0c\u64c1\u6709\u8c50\u5bcc\u7684\u6559\u5b78\u7d93\u9a57\uff0c\u71b1\u8877\u65bc\u7db2\u9801\u524d\u5f8c\u7aef\u6574\u5408\u8207AI\u6f14\u7b97\u6cd5\u7684\u958b\u767c\u3002\u5e0c\u671b\u85c9\u7531\u9435\u4eba\u8cfd\uff0c\u5c07\u6240\u5b78\u8ca2\u737b\u51fa\u4f86\uff0c\u70baAI\u9818\u57df\u63d0\u4f9b\u66f4\u591a\u8cc7\u6e90\u3002 @andy6804tw \u6b61\u8fce\u5927\u5bb6\u8a02\u95b1\u6211\u7684 YouTube \u983b\u9053\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u7c21\u5831 PDF & Code \u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u95dc\u65bc\u4f5c\u8005"},{"location":"10.KNN/","text":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 K-\u8fd1\u9130\u6f14\u7b97\u6cd5\u4ecb\u7d39 KNN \u6f14\u7b97\u6cd5\u89e3\u6790 KNN \u65bc\u5206\u985e\u5668\u548c\u8ff4\u6b78\u5668\u7684\u505a\u6cd5 \u6bd4\u8f03 KNN \u8207 k-means \u5dee\u7570 \u5be6\u4f5c KNN \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u5be6\u4f5c KNN \u5206\u985e\u5668\uff0c\u89c0\u5bdf\u4e0d\u540c k \u503c\u6703\u5c0d\u5206\u985e\u7d50\u679c\u9020\u6210\u4ec0\u9ebc\u5f71\u97ff \u5be6\u4f5c KNN \u8ff4\u6b78\u8ff4\u5668 \u7bc4\u4f8b\u7a0b\u5f0f KNN(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f KNN(Regression)\uff1a K-\u8fd1\u9130\u6f14\u7b97\u6cd5 (KNN) KNN \u7684\u5168\u540d K Nearest Neighbor \u662f\u5c6c\u65bc\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684 Supervised learning \u5176\u4e2d\u4e00\u7a2e\u7b97\u6cd5\uff0c\u9867\u540d\u601d\u7fa9\u5c31\u662f k \u500b\u6700\u63a5\u8fd1\u4f60\u7684 \u9130\u5c45 \u3002\u5206\u985e\u7684\u6a19\u6e96\u662f\u7531\u9130\u5c45\u300c\u591a\u6578\u8868\u6c7a\u300d\u6c7a\u5b9a\u7684\u3002\u5728 Sklearn \u4e2d KNN \u53ef\u4ee5\u7528\u4f5c\u5206\u985e\u6216\u8ff4\u6b78\u7684\u6a21\u578b\u3002 KNN \u5206\u985e\u5668 \u5728\u5206\u985e\u554f\u984c\u4e2d KNN \u6f14\u7b97\u6cd5\u63a1\u591a\u6578\u6c7a\u6a19\u6e96\uff0c\u5229\u7528 k \u500b\u6700\u8fd1\u7684\u9130\u5c45\u4f86\u5224\u5b9a\u65b0\u7684\u8cc7\u6599\u662f\u5728\u54ea\u4e00\u7fa4\u3002\u5176\u6f14\u7b97\u6cd5\u6d41\u7a0b\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f7f\u7528\u8005\u5148\u6c7a\u5b9a k \u7684\u5927\u5c0f\u3002\u63a5\u8457\u8a08\u7b97\u76ee\u524d\u8a72\u7b46\u65b0\u7684\u8cc7\u6599\u8207\u9130\u8fd1\u7684\u8cc7\u6599\u9593\u7684\u8ddd\u96e2\u3002\u7b2c\u4e09\u6b65\u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44\u3002 \u6c7a\u5b9a k \u503c \u6c42\u6bcf\u500b\u9130\u5c45\u8ddf\u81ea\u5df1\u4e4b\u9593\u7684\u8ddd\u96e2 \u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44 \u5982\u679c\u9084\u662f\u6c92\u8fa6\u6cd5\u6c7a\u5b9a\u5728\u54ea\u4e00\u7d44\uff0c\u56de\u5230\u7b2c\u4e00\u6b65\u8abf\u6574 k \u503c\uff0c\u518d\u7e7c\u7e8c k \u7684\u5927\u5c0f\u6703\u5f71\u97ff\u6a21\u578b\u6700\u7d42\u7684\u5206\u985e\u7d50\u679c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u7da0\u8272\u9ede\u662f\u65b0\u7684\u8cc7\u6599\u3002\u7576 k \u7b49\u65bc 3 \u6642\u6703\u641c\u5c0b\u96e2\u7da0\u8272\u9ede\u6700\u8fd1\u7684\u9130\u5c45\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u4e09\u89d2\u5f62\u70ba\u9810\u6e2c\u7684\u7d50\u679c\u3002\u7576 k \u8a2d\u70ba 5 \u7684\u6642\u5019\u7d50\u679c\u53c8\u4e0d\u4e00\u6a23\u4e86\uff0c\u6211\u5011\u767c\u73fe\u8ddd\u96e2\u6700\u8fd1\u7684\u4e09\u500b\u9130\u5c45\u70ba\u7d05\u8272\u6b63\u65b9\u5f62\u3002 KNN \u8ff4\u6b78\u5668 KNN \u540c\u6642\u4e5f\u80fd\u904b\u7528\u5728\u8ff4\u6b78\u554f\u984c\u4e0a\u9762\u3002\u8ff4\u6b78\u6a21\u578b\u8f38\u51fa\u7684\u7d50\u679c\u662f\u4e00\u500b\u9023\u7e8c\u6027\u6578\u503c\uff0c\u5176\u9810\u6e2c\u8a72\u503c\u662f k \u500b\u6700\u8fd1\u9130\u5c45\u8f38\u51fa\u7684\u5e73\u5747\u503c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\u7576 k=2 \u6642\uff0c\u5047\u8a2d\u6211\u5011\u6709\u4e00\u500b\u8f38\u5165\u7279\u5fb5 x \u8981\u9810\u6e2c\u7684\u8f38\u51fa\u70ba y\u3002\u7576\u6709\u4e00\u7b46\u65b0\u7684 x \u9032\u4f86\u7684\u6642\u5019\uff0c KNN \u8ff4\u6b78\u5668\u6703\u5c0b\u627e\u9130\u8fd1 2 \u500b x \u7684\u8f38\u51fa\u505a\u5e73\u5747\u7576\u4f5c\u662f\u8a72\u7b46\u8cc7\u6599\u7684\u9810\u6e2c\u7d50\u679c\u3002 KNN \u5ea6\u91cf\u8ddd\u96e2\u7684\u65b9\u6cd5 \u8981\u5224\u65b7\u90a3\u4e9b\u662f\u9130\u5c45\u7684\u8a71\uff0c\u9996\u5148\u8981\u91cf\u5316\u76f8\u4f3c\u5ea6\uff0c\u800c\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2 (Euclidean distance) \u662f\u6bd4\u8f03\u5e38\u7528\u7684\u65b9\u6cd5\u4f86\u91cf\u5ea6\u76f8\u4f3c\u5ea6\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u660e\u53ef\u592b\u65af\u57fa\u8ddd\u96e2(Sklearn \u9810\u8a2d)\u3001\u66fc\u54c8\u9813\u8ddd\u96e2\u3001\u67f4\u6bd4\u96ea\u592b\u8ddd\u96e2\u3001\u593e\u89d2\u9918\u5f26\u3001\u6f22\u660e\u8ddd\u96e2\u3001\u5091\u5361\u5fb7\u76f8\u4f3c\u4fc2\u6578 \u90fd\u53ef\u4ee5\u8a55\u4f30\u8ddd\u96e2\u7684\u9060\u8fd1\u3002 KNN \u8207 k-means \u52ff\u6df7\u6dc6 KNN \u7684\u7f3a\u9ede\u662f\u5c0d\u8cc7\u6599\u7684\u5c40\u90e8\u7d50\u69cb\u975e\u5e38\u654f\u611f\uff0c\u56e0\u6b64\u8abf\u6574\u9069\u7576\u7684 k \u503c\u6975\u70ba\u91cd\u8981\u3002\u53e6\u5916\u5927\u5bb6\u5f88\u5e38\u5c07 KNN \u8207 K-means \u6df7\u6dc6\uff0c\u96d6\u7136\u5169\u8005\u90fd\u6709 k \u503c\u8981\u8a2d\u5b9a\u4f46\u5176\u5be6\u5169\u8005\u7121\u4efb\u4f55\u95dc\u806f\u3002KNN \u7684 k \u662f\u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf\u63a1\u591a\u6578\u6c7a\u4f5c\u70ba\u8f38\u51fa\u7684\u4f9d\u64da\u3002\u800c K-means \u7684 k \u662f\u8a2d\u5b9a\u96c6\u7fa4\u7684\u985e\u5225\u4e2d\u5fc3\u9ede\u6578\u91cf\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] KNN \u5206\u985e\u5668 \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u70ba\u5206\u985e\u7bc4\u4f8b\uff0c\u4f7f\u7528 Sklearn \u5efa\u7acb k-nearest neighbors(KNN) \u6a21\u578b\u3002\u4ee5\u4e0b\u662f KNN \u5e38\u898b\u7684\u6a21\u578b\u64cd\u4f5c\u53c3\u6578\uff1a Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsClassifier # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsClassifier ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = knnModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , knnModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , knnModel . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9619047619047619 \u6e2c\u8a66\u96c6: 0.9555555555555556 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u5f9e\u5716\u4e2d\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u7684 Setosa \u5b8c\u6574\u7684\u88ab\u5206\u985e\u51fa\u4f86\uff0c\u800c\u6a58\u8272\u8207\u7da0\u8272\u7684\u5206\u4f48\u662f\u7dca\u5bc6\u76f8\u9023\u5728\u4ea4\u754c\u8655\u5206\u985e\u7684\u7d50\u679c\u6bd4\u8f03\u4e0d\u7a69\u5b9a\u3002\u4f46\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u7d50\u679c\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u6709\u767e\u5206\u4e4b95\u4ee5\u4e0a\u7684\u6e96\u78ba\u7387\u3002 KNN \u8ff4\u6b78\u5668 KNN \u4e0d\u50c5\u80fd\u5920\u4f5c\u70ba\u5206\u985e\u5668\uff0c\u4e5f\u53ef\u4ee5\u505a\u8ff4\u6b78\u9023\u7e8c\u6027\u7684\u6578\u503c\u9810\u6e2c\u3002\u5176\u9810\u6e2c\u503c\u70bak\u500b\u6700\u8fd1\u9130\u5c45\u7684\u503c\u7684\u5e73\u5747\u503c\u3002 Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsRegressor # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsRegressor ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = knnModel . predict ( x ) \u6a21\u578b\u8a55\u4f30 Sklearn \u4e2d KNN \u8ff4\u6b78\u6a21\u578b\u7684 score \u51fd\u5f0f\u662f R2 score\uff0c\u53ef\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u4f9d\u64da\uff0c\u5176\u6578\u503c\u8d8a\u63a5\u8fd1\u65bc1\u4ee3\u8868\u6a21\u578b\u8d8a\u4f73\u3002\u9664\u4e86 R2 score \u9084\u6709\u5176\u4ed6\u8a31\u591a\u8ff4\u6b78\u6a21\u578b\u7684\u8a55\u4f30\u65b9\u6cd5\uff0c\u4f8b\u5982\uff1a MSE\u3001MAE\u3001RMSE\u3002 from sklearn import metrics print ( 'R2 score: ' , knnModel . score ( x , y )) mse = metrics . mean_squared_error ( y , predicted ) print ( 'MSE score: ' , mse ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN"},{"location":"10.KNN/#day-10-knn","text":"","title":"[Day 10] \u8fd1\u6731\u8005\u8d64\uff0c\u8fd1\u58a8\u8005\u9ed1 - KNN"},{"location":"10.KNN/#_1","text":"K-\u8fd1\u9130\u6f14\u7b97\u6cd5\u4ecb\u7d39 KNN \u6f14\u7b97\u6cd5\u89e3\u6790 KNN \u65bc\u5206\u985e\u5668\u548c\u8ff4\u6b78\u5668\u7684\u505a\u6cd5 \u6bd4\u8f03 KNN \u8207 k-means \u5dee\u7570 \u5be6\u4f5c KNN \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u5be6\u4f5c KNN \u5206\u985e\u5668\uff0c\u89c0\u5bdf\u4e0d\u540c k \u503c\u6703\u5c0d\u5206\u985e\u7d50\u679c\u9020\u6210\u4ec0\u9ebc\u5f71\u97ff \u5be6\u4f5c KNN \u8ff4\u6b78\u8ff4\u5668 \u7bc4\u4f8b\u7a0b\u5f0f KNN(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f KNN(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"10.KNN/#k-knn","text":"KNN \u7684\u5168\u540d K Nearest Neighbor \u662f\u5c6c\u65bc\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684 Supervised learning \u5176\u4e2d\u4e00\u7a2e\u7b97\u6cd5\uff0c\u9867\u540d\u601d\u7fa9\u5c31\u662f k \u500b\u6700\u63a5\u8fd1\u4f60\u7684 \u9130\u5c45 \u3002\u5206\u985e\u7684\u6a19\u6e96\u662f\u7531\u9130\u5c45\u300c\u591a\u6578\u8868\u6c7a\u300d\u6c7a\u5b9a\u7684\u3002\u5728 Sklearn \u4e2d KNN \u53ef\u4ee5\u7528\u4f5c\u5206\u985e\u6216\u8ff4\u6b78\u7684\u6a21\u578b\u3002","title":"K-\u8fd1\u9130\u6f14\u7b97\u6cd5 (KNN)"},{"location":"10.KNN/#knn","text":"\u5728\u5206\u985e\u554f\u984c\u4e2d KNN \u6f14\u7b97\u6cd5\u63a1\u591a\u6578\u6c7a\u6a19\u6e96\uff0c\u5229\u7528 k \u500b\u6700\u8fd1\u7684\u9130\u5c45\u4f86\u5224\u5b9a\u65b0\u7684\u8cc7\u6599\u662f\u5728\u54ea\u4e00\u7fa4\u3002\u5176\u6f14\u7b97\u6cd5\u6d41\u7a0b\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f7f\u7528\u8005\u5148\u6c7a\u5b9a k \u7684\u5927\u5c0f\u3002\u63a5\u8457\u8a08\u7b97\u76ee\u524d\u8a72\u7b46\u65b0\u7684\u8cc7\u6599\u8207\u9130\u8fd1\u7684\u8cc7\u6599\u9593\u7684\u8ddd\u96e2\u3002\u7b2c\u4e09\u6b65\u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44\u3002 \u6c7a\u5b9a k \u503c \u6c42\u6bcf\u500b\u9130\u5c45\u8ddf\u81ea\u5df1\u4e4b\u9593\u7684\u8ddd\u96e2 \u627e\u51fa\u8ddf\u81ea\u5df1\u6700\u8fd1\u7684 k \u500b\u9130\u5c45\uff0c\u67e5\u770b\u54ea\u4e00\u7d44\u9130\u5c45\u6578\u91cf\u6700\u591a\uff0c\u5c31\u52a0\u5165\u54ea\u4e00\u7d44 \u5982\u679c\u9084\u662f\u6c92\u8fa6\u6cd5\u6c7a\u5b9a\u5728\u54ea\u4e00\u7d44\uff0c\u56de\u5230\u7b2c\u4e00\u6b65\u8abf\u6574 k \u503c\uff0c\u518d\u7e7c\u7e8c k \u7684\u5927\u5c0f\u6703\u5f71\u97ff\u6a21\u578b\u6700\u7d42\u7684\u5206\u985e\u7d50\u679c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u7da0\u8272\u9ede\u662f\u65b0\u7684\u8cc7\u6599\u3002\u7576 k \u7b49\u65bc 3 \u6642\u6703\u641c\u5c0b\u96e2\u7da0\u8272\u9ede\u6700\u8fd1\u7684\u9130\u5c45\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u4e09\u89d2\u5f62\u70ba\u9810\u6e2c\u7684\u7d50\u679c\u3002\u7576 k \u8a2d\u70ba 5 \u7684\u6642\u5019\u7d50\u679c\u53c8\u4e0d\u4e00\u6a23\u4e86\uff0c\u6211\u5011\u767c\u73fe\u8ddd\u96e2\u6700\u8fd1\u7684\u4e09\u500b\u9130\u5c45\u70ba\u7d05\u8272\u6b63\u65b9\u5f62\u3002","title":"KNN \u5206\u985e\u5668"},{"location":"10.KNN/#knn_1","text":"KNN \u540c\u6642\u4e5f\u80fd\u904b\u7528\u5728\u8ff4\u6b78\u554f\u984c\u4e0a\u9762\u3002\u8ff4\u6b78\u6a21\u578b\u8f38\u51fa\u7684\u7d50\u679c\u662f\u4e00\u500b\u9023\u7e8c\u6027\u6578\u503c\uff0c\u5176\u9810\u6e2c\u8a72\u503c\u662f k \u500b\u6700\u8fd1\u9130\u5c45\u8f38\u51fa\u7684\u5e73\u5747\u503c\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\u7576 k=2 \u6642\uff0c\u5047\u8a2d\u6211\u5011\u6709\u4e00\u500b\u8f38\u5165\u7279\u5fb5 x \u8981\u9810\u6e2c\u7684\u8f38\u51fa\u70ba y\u3002\u7576\u6709\u4e00\u7b46\u65b0\u7684 x \u9032\u4f86\u7684\u6642\u5019\uff0c KNN \u8ff4\u6b78\u5668\u6703\u5c0b\u627e\u9130\u8fd1 2 \u500b x \u7684\u8f38\u51fa\u505a\u5e73\u5747\u7576\u4f5c\u662f\u8a72\u7b46\u8cc7\u6599\u7684\u9810\u6e2c\u7d50\u679c\u3002","title":"KNN \u8ff4\u6b78\u5668"},{"location":"10.KNN/#knn_2","text":"\u8981\u5224\u65b7\u90a3\u4e9b\u662f\u9130\u5c45\u7684\u8a71\uff0c\u9996\u5148\u8981\u91cf\u5316\u76f8\u4f3c\u5ea6\uff0c\u800c\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2 (Euclidean distance) \u662f\u6bd4\u8f03\u5e38\u7528\u7684\u65b9\u6cd5\u4f86\u91cf\u5ea6\u76f8\u4f3c\u5ea6\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u660e\u53ef\u592b\u65af\u57fa\u8ddd\u96e2(Sklearn \u9810\u8a2d)\u3001\u66fc\u54c8\u9813\u8ddd\u96e2\u3001\u67f4\u6bd4\u96ea\u592b\u8ddd\u96e2\u3001\u593e\u89d2\u9918\u5f26\u3001\u6f22\u660e\u8ddd\u96e2\u3001\u5091\u5361\u5fb7\u76f8\u4f3c\u4fc2\u6578 \u90fd\u53ef\u4ee5\u8a55\u4f30\u8ddd\u96e2\u7684\u9060\u8fd1\u3002","title":"KNN \u5ea6\u91cf\u8ddd\u96e2\u7684\u65b9\u6cd5"},{"location":"10.KNN/#knn-k-means","text":"KNN \u7684\u7f3a\u9ede\u662f\u5c0d\u8cc7\u6599\u7684\u5c40\u90e8\u7d50\u69cb\u975e\u5e38\u654f\u611f\uff0c\u56e0\u6b64\u8abf\u6574\u9069\u7576\u7684 k \u503c\u6975\u70ba\u91cd\u8981\u3002\u53e6\u5916\u5927\u5bb6\u5f88\u5e38\u5c07 KNN \u8207 K-means \u6df7\u6dc6\uff0c\u96d6\u7136\u5169\u8005\u90fd\u6709 k \u503c\u8981\u8a2d\u5b9a\u4f46\u5176\u5be6\u5169\u8005\u7121\u4efb\u4f55\u95dc\u806f\u3002KNN \u7684 k \u662f\u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf\u63a1\u591a\u6578\u6c7a\u4f5c\u70ba\u8f38\u51fa\u7684\u4f9d\u64da\u3002\u800c K-means \u7684 k \u662f\u8a2d\u5b9a\u96c6\u7fa4\u7684\u985e\u5225\u4e2d\u5fc3\u9ede\u6578\u91cf\u3002","title":"KNN \u8207 k-means \u52ff\u6df7\u6dc6"},{"location":"10.KNN/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"10.KNN/#knn_3","text":"\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u70ba\u5206\u985e\u7bc4\u4f8b\uff0c\u4f7f\u7528 Sklearn \u5efa\u7acb k-nearest neighbors(KNN) \u6a21\u578b\u3002\u4ee5\u4e0b\u662f KNN \u5e38\u898b\u7684\u6a21\u578b\u64cd\u4f5c\u53c3\u6578\uff1a Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsClassifier # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsClassifier ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = knnModel . predict ( X_train )","title":"KNN \u5206\u985e\u5668"},{"location":"10.KNN/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , knnModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , knnModel . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9619047619047619 \u6e2c\u8a66\u96c6: 0.9555555555555556 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u5f9e\u5716\u4e2d\u53ef\u4ee5\u767c\u73fe\u85cd\u8272\u7684 Setosa \u5b8c\u6574\u7684\u88ab\u5206\u985e\u51fa\u4f86\uff0c\u800c\u6a58\u8272\u8207\u7da0\u8272\u7684\u5206\u4f48\u662f\u7dca\u5bc6\u76f8\u9023\u5728\u4ea4\u754c\u8655\u5206\u985e\u7684\u7d50\u679c\u6bd4\u8f03\u4e0d\u7a69\u5b9a\u3002\u4f46\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u7d50\u679c\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u6709\u767e\u5206\u4e4b95\u4ee5\u4e0a\u7684\u6e96\u78ba\u7387\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"10.KNN/#knn_4","text":"KNN \u4e0d\u50c5\u80fd\u5920\u4f5c\u70ba\u5206\u985e\u5668\uff0c\u4e5f\u53ef\u4ee5\u505a\u8ff4\u6b78\u9023\u7e8c\u6027\u7684\u6578\u503c\u9810\u6e2c\u3002\u5176\u9810\u6e2c\u503c\u70bak\u500b\u6700\u8fd1\u9130\u5c45\u7684\u503c\u7684\u5e73\u5747\u503c\u3002 Parameters: - n_neighbors: \u8a2d\u5b9a\u9130\u5c45\u7684\u6578\u91cf(k)\uff0c\u9078\u53d6\u6700\u8fd1\u7684k\u500b\u9ede\uff0c\u9810\u8a2d\u70ba5\u3002 - algorithm: \u641c\u5c0b\u6578\u6f14\u7b97\u6cd5{'auto'\uff0c'ball_tree'\uff0c'kd_tree'\uff0c'brute'}\uff0c\u53ef\u9078\u3002 - metric: \u8a08\u7b97\u8ddd\u96e2\u7684\u65b9\u5f0f\uff0c\u9810\u8a2d\u70ba\u6b50\u5e7e\u91cc\u5f97\u8ddd\u96e2\u3002 Attributes: - classes_: \u53d6\u5f97\u985e\u5225\u9663\u5217\u3002 - effective_metric_: \u53d6\u5f97\u8a08\u7b97\u8ddd\u96e2\u7684\u516c\u5f0f\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.neighbors import KNeighborsRegressor # \u5efa\u7acb KNN \u6a21\u578b knnModel = KNeighborsRegressor ( n_neighbors = 3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b knnModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = knnModel . predict ( x )","title":"KNN \u8ff4\u6b78\u5668"},{"location":"10.KNN/#_3","text":"Sklearn \u4e2d KNN \u8ff4\u6b78\u6a21\u578b\u7684 score \u51fd\u5f0f\u662f R2 score\uff0c\u53ef\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u4f9d\u64da\uff0c\u5176\u6578\u503c\u8d8a\u63a5\u8fd1\u65bc1\u4ee3\u8868\u6a21\u578b\u8d8a\u4f73\u3002\u9664\u4e86 R2 score \u9084\u6709\u5176\u4ed6\u8a31\u591a\u8ff4\u6b78\u6a21\u578b\u7684\u8a55\u4f30\u65b9\u6cd5\uff0c\u4f8b\u5982\uff1a MSE\u3001MAE\u3001RMSE\u3002 from sklearn import metrics print ( 'R2 score: ' , knnModel . score ( x , y )) mse = metrics . mean_squared_error ( y , predicted ) print ( 'MSE score: ' , mse ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6a21\u578b\u8a55\u4f30"},{"location":"11.SVM/","text":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 SVM \u5206\u985e\u5668 \u4f55\u8b02\u652f\u6301\u5411\u91cf\u6a5f? \u975e\u7dda\u6027\u8207\u7dda\u6027? \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f\u3002 SVR \u8ff4\u6b78\u5668 \u5b78\u7fd2 SVR \u65b9\u6cd5\u5982\u4f55\u8655\u7406\u9023\u7e8c\u6027\u8f38\u51fa\u3002 SVM \u5206\u985e\u5668\u8207 SVR \u8ff4\u6b78\u5668\u624b\u628a\u624b\u5be6\u4f5c \u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002 \u67e5\u770b SVR \u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f SVM(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f SVR(Regression)\uff1a SVM \u5206\u985e\u5668 \u652f\u6301\u5411\u91cf\u6a5f (support vector machine, SVM) \u662f\u4e00\u500b\u57fa\u65bc\u7d71\u8a08\u5b78\u7fd2\u7684\u76e3\u7763\u5f0f\u6f14\u7b97\u6cd5\uff0c\u900f\u904e\u627e\u51fa\u4e00\u500b\u8d85\u5e73\u9762\uff0c\u4f7f\u4e4b\u5c07\u5169\u500b\u4e0d\u540c\u7684\u96c6\u5408\u5206\u958b\u3002\u4e00\u822c\u7684\u5206\u985e\u554f\u984c\u6211\u5011\u5c31\u662f\u8981\uff0c\u627e\u51fa\u5728\u4e0d\u540c\u7684\u8cc7\u6599\u985e\u5225\u4e2d\u7684\u5206\u9694\u7dda\u3002\u4f46\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u9019\u500b\u5206\u9694\u7dda\u975e\u5e38\u8907\u96dc\u4e14\u6709\u5f88\u591a\u7a2e\u53ef\u80fd\u3002\u7136\u800c SVM \u5c31\u662f\u8981\u5728\u9019\u5f88\u591a\u7a2e\u7684\u53ef\u80fd\u7576\u4e2d\u627e\u51fa\u6700\u4f73\u7684\u89e3\u3002SVM \u6f14\u7b97\u6cd5\u7684\u7cbe\u795e\u5c31\u662f\u627e\u51fa\u4e00\u689d\u5206\u9694\u7dda\u4f7f\u6240\u6709\u5728\u908a\u754c\u4e0a\u7684\u9ede\u96e2\u5f97\u8d8a\u9060\u8d8a\u597d\uff0c\u4f7f\u6a21\u578b\u62b5\u6297\u96dc\u8a0a\u7684\u80fd\u529b\u66f4\u4f73\u3002 SVM \u53ef\u5206\u70ba\u4ee5\u4e0b\u5169\u7a2e\uff1a - \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f - \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f\u5c31\u662f\u5728\u4e0b\u5716\u7bc4\u4f8b\u7684\u4e8c\u7dad\u5716\u5f62\u4e2d\u627e\u51fa\u4e00\u689d\u7dda\uff0c\u76ee\u6a19\u8b93\u9019\u689d\u76f4\u7dda\u8207\u5169\u500b\u985e\u5225\u4e4b\u9593\u7684\u9593\u9694\u5bec\u5ea6\u8ddd\u96e2\u6700\u5927\u5316\u3002\u5176\u4e2d\u96e2\u5169\u689d\u865b\u7dda(\u9593\u9694\u8d85\u5e73\u9762)\u8ddd\u96e2\u6700\u8fd1\u7684\u9ede\uff0c\u5c31\u7a31\u70ba\u652f\u6301\u5411\u91cf (support vector)\u3002 \u7576\u7136\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f80\u5f80\u7a0d\u5fae\u8907\u96dc\uff0c\u90a3\u5982\u679c\u4e0d\u662f\u7dda\u6027\u53ef\u5206\u96c6\u5408\u600e\u9ebc\u8fa6\u5462\uff1f\u6211\u5011\u53ef\u4ee5\u904b\u7528\u6838\u51fd\u6578(kernel function) \u5e6b\u6211\u5011\u9020\u51fa\u4e0d\u53ef\u5206\u7684\u5206\u5272\u5e73\u9762\u3002 \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f \u9664\u4e86\u9032\u884c\u7dda\u6027\u5206\u985e\u4e4b\u5916 SVM \u9084\u53ef\u4ee5\u4f7f\u7528\u6838\u6280\u5de7\u6709\u6548\u5730\u9032\u884c\u975e\u7dda\u6027\u5206\u985e\uff0c\u5c07\u5176\u8f38\u5165\u7684\u8cc7\u6599\u6295\u5230\u66f4\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\uff0c\u4e26\u5728\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\u9032\u884c\u9ad8\u7dad\u5ea6\u7684\u5206\u985e\u6216\u964d\u7dad\u3002\u7c21\u55ae\u4f86\u8aaa\u900f\u904e\u591a\u7dad\u5ea6\u7684\u6295\u5f71\u6280\u5de7\uff0c\u5c07\u539f\u672c\u5728\u4e8c\u7dad\u7a7a\u9593\u4e2d\u4e0d\u53ef\u5206\u7684\u9ede\u5230\u4e86\u4e09\u7dad\u7a7a\u9593\u5c31\u53ef\u5206\u4e86\u3002\u4f46\u662f\u96a8\u8457\u8cc7\u6599\u91cf\u589e\u52a0\u5176\u904b\u7b97\u4e5f\u6703\u8b8a\u591a\uff0c\u76f8\u5c0d\u7684\u57f7\u884c\u901f\u5ea6\u5c31\u6703\u8b8a\u6162\u3002 \u5169\u500b\u975e\u7dda\u6027\u7684 Kernel\uff1a - Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db - Radial Basis Function \u9ad8\u65af\u8f49\u63db \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f SVM \u6f14\u7b97\u6cd5\u6700\u521d\u662f\u70ba\u4e8c\u5143\u5206\u985e\u554f\u984c\u6240\u8a2d\u8a08\u7684\uff0c\u4f46\u662f\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u4f8b\u5b50\u4e00\u5b9a\u4e0d\u53ea\u6709\u5169\u985e\u7684\u554f\u984c\u8981\u89e3\u6c7a\u3002\u4ed6\u7684\u89e3\u6c7a\u65b9\u5f0f\u8207 [Day 9 \u908f\u8f2f\u8ff4\u6b78] \u6240\u63d0\u5230\u7684\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u6a23\u7684\u3002\u4e3b\u8981\u662f\u5c07\u4e00\u500b\u591a\u5143\u5206\u985e\u554f\u984c\u8f49\u63db\u70ba\u591a\u500b\u4e8c\u5143\u5206\u985e\u554f\u984c\u3002\u5e38\u898b\u65b9\u6cd5\u5305\u62ec one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u3002 one-vs-rest(OvR) \u5c07\u67d0\u500b\u985e\u5225\u7684\u6a23\u672c\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u6a23\u672c\u6b78\u70ba\u53e6\u4e00\u985e many-vs-many(MvM) \u5728\u4efb\u610f\u5169\u985e\u6a23\u672c\u4e4b\u9593\u8a2d\u8a08\u4e00\u500b SVM \u8a73\u7d30\u4ecb\u7d39\u53ef\u4ee5\u53c3\u8003 [Day 9 \u908f\u8f2f\u8ff4\u6b78] SVR \u8ff4\u6b78\u5668 \u652f\u6301\u5411\u91cf\u6a5f\uff08SVM\uff09\u662f\u5c08\u9580\u8655\u7406\u5206\u985e\u7684\u554f\u984c\uff0c\u9084\u6709\u53e6\u4e00\u500b\u540d\u8a5e\u7a31\u70ba\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09\u5c08\u9580\u8655\u7406\u8ff4\u6b78\u554f\u984c\u3002SVR \u662f SVM \u7684\u5ef6\u4f38\uff0c\u800c\u652f\u6301\u5411\u91cf\u8ff4\u6b78\u53ea\u8981 f(x) \u8207 y \u504f\u96e2\u7a0b\u5ea6\u4e0d\u8981\u592a\u5927\uff0c\u65e2\u53ef\u4ee5\u8a8d\u70ba\u9810\u6e2c\u6b63\u78ba\u3002\u5982\u4e0b\u5716\u4e2d\u7684\u8ff4\u6b78\u7bc4\u4f8b\uff0c\u5728\u7dda\u6027\u7684 SVR \u6a21\u578b\u4e2d\u6703\u5728\u5de6\u53f3\u52a0\u4e0a \ud835\udf00 \u4f5c\u70ba\u6a21\u578b\u5bb9\u5fcd\u7684\u5340\u9593\u3002\u56e0\u6b64\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ea\u6709\u5728\u865b\u7dda\u4ee5\u5916\u7684\u8aa4\u5dee\u624d\u6703\u88ab\u8a08\u7b97\u3002\u6b64\u5916 SVR \u4e5f\u63d0\u4f9b\u4e86\u7dda\u6027\u8207\u975e\u7dda\u6027\u7684\u6838\u6280\u5de7\uff0c\u5176\u4e2d\u5728\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d\u53ef\u4ee5\u4f7f\u7528\u9ad8\u6b21\u65b9\u8f49\u63db\u6216\u662f\u9ad8\u65af\u8f49\u63db\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u652f\u6301\u5411\u91cf\u6a5f (Support Vector Machine, SVM) \u6a21\u578b SVM \u80fd\u5920\u900f\u904e\u8d85\u53c3\u6578 C \u4f86\u9054\u5230 weight regularization \u4f86\u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u3002\u9664\u4e86\u9019\u9ede\u6211\u5011\u9084\u80fd\u900f\u904e SVM \u7684 Kernel trick \u7684\u65b9\u5f0f\u5c07\u8cc7\u6599\u505a\u975e\u7dda\u6027\u8f49\u63db\uff0c\u5e38\u898b\u7684 kernel \u9664\u4e86 linear \u7dda\u6027\u4ee5\u5916\u9084\u6709\u5169\u4e86\u975e\u7dda\u6027\u7684 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u4ee5\u53ca Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 \u56db\u7a2e\u4e0d\u540cSVC\u5206\u985e\u5668: 1. LinearSVC (\u7dda\u6027) 2. kernel='linear' (\u7dda\u6027) 3. kernel='poly' (\u975e\u7dda\u6027) 4. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 LinearSVC Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - max_iter: \u6700\u5927\u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d1000\u3002 from sklearn import svm # \u5efa\u7acb linearSvc \u6a21\u578b linearSvcModel = svm . LinearSVC ( C = 1 , max_iter = 10000 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearSvcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearSvcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = linearSvcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.96 kernel='linear' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b svcModel = svm . SVC ( kernel = 'linear' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b svcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = svcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = svcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 kernel='poly' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVC ( kernel = 'poly' , degree = 3 , gamma = 'auto' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = polyModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 kernel='rbf' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVC ( kernel = 'rbf' , gamma = 0.7 , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = rbfModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 \u6211\u5011\u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002\u4ee5\u4e0b\u7bc4\u4f8b\u5c07\u539f\u5148 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u56db\u500b\u7279\u5fb5\u900f\u904e PCA \u964d\u6210\u4e8c\u7dad\uff0c\u4ee5\u5229\u6211\u5011\u505a\u8996\u89ba\u5316\u89c0\u5bdf\u3002\u900f\u904e\u56db\u7a2e\u4e0d\u540c\u7684 SVC \u5be6\u9a57\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e0d\u540c\u7684\u6838\u6280\u5de7\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u6c7a\u7b56\u908a\u7dda\u90fd\u4e0d\u76e1\u76f8\u540c\u3002\u7136\u800c\u8d8a\u8907\u96dc\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u908a\u754c\u5c31\u6703\u8b8a\u5f97\u8d8a\u626d\u66f2\uff0c\u56e0\u70ba\u975e\u7dda\u6027\u7684\u6a21\u578b\u80fd\u5920\u6709\u6bd4\u8f03\u597d\u7684\u64ec\u5408\u4f7f\u5f97\u932f\u8aa4\u7387\u964d\u4f4e\u3002 \u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09 \u6a21\u578b \u5728 Sklearn \u4e2d SVM \u63d0\u4f9b\u8ff4\u6b78\u7684\u6a21\u578b\u7a31\u4f5c SVR\u3002\u6b64\u5916 SVR \u8ff4\u6b78\u5668\u4e5f\u63d0\u4f9b\u4e86\u4e09\u7a2e\u4e0d\u540c\u7684\u6838\u51fd\u6578\uff0c\u5206\u5225\u6709\u4e00\u500b\u7dda\u6027\u4ee5\u53ca\u5169\u500b\u975e\u7dda\u6027\u7684\u6a21\u578b\u53ef\u4ee5\u547c\u53eb\u3002\u5728 SVR \u8ff4\u6b78\u7684\u5be6\u9a57\uff0c\u6211\u5011\u62ff\u4e00\u7d44\u975e\u7dda\u6027\u7684\u8cc7\u6599\u4f5c\u70ba\u4f8b\u5b50\u3002\u4e26\u67e5\u770b\u5728\u4e0d\u540c\u7684\u6838\u6280\u5de7\u4e0b\u6a21\u578b\u6240\u64ec\u5408\u7684\u6210\u6548\u70ba\u4f55\uff1f \u4e09\u7a2e\u4e0d\u540cSVR\u8ff4\u6b78\u5668: 1. kernel='linear' (\u7dda\u6027) 2. kernel='poly' (\u975e\u7dda\u6027) 3. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 kernel='linear' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b linearModel = svm . SVR ( C = 1 , kernel = 'linear' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 5.903802524650818 kernel='poly' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u9810\u6e2c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVR ( C = 6 , kernel = 'poly' , degree = 3 , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 8.296270605383441 kernel='rbf' Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVR ( C = 6 , kernel = 'rbf' , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 2.2551572190243157 \u9019\u88e1\u7684\u8ff4\u6b78\u6a21\u578b\u63a1\u7528\u975e\u7dda\u6027\u7684\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u7684\u5be6\u9a57\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u7684\u6838\u51fd\u6578\u7121\u6cd5\u6709\u6548\u7684\u9810\u6e2c\u6240\u6709\u6578\u64da\u9ede\u7684\u8da8\u52e2\u3002\u800c\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d RBF \u7684\u6a21\u578b\u5c0d\u65bc\u6b64\u8cc7\u6599\u6709\u6bd4\u8f03\u597d\u7684\u9810\u6e2c\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM)"},{"location":"11.SVM/#day-11-svm","text":"","title":"[Day 11] \u6838\u6a21\u578b - \u652f\u6301\u5411\u91cf\u6a5f (SVM)"},{"location":"11.SVM/#_1","text":"SVM \u5206\u985e\u5668 \u4f55\u8b02\u652f\u6301\u5411\u91cf\u6a5f? \u975e\u7dda\u6027\u8207\u7dda\u6027? \u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f\u3002 SVR \u8ff4\u6b78\u5668 \u5b78\u7fd2 SVR \u65b9\u6cd5\u5982\u4f55\u8655\u7406\u9023\u7e8c\u6027\u8f38\u51fa\u3002 SVM \u5206\u985e\u5668\u8207 SVR \u8ff4\u6b78\u5668\u624b\u628a\u624b\u5be6\u4f5c \u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002 \u67e5\u770b SVR \u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f SVM(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f SVR(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"11.SVM/#svm","text":"\u652f\u6301\u5411\u91cf\u6a5f (support vector machine, SVM) \u662f\u4e00\u500b\u57fa\u65bc\u7d71\u8a08\u5b78\u7fd2\u7684\u76e3\u7763\u5f0f\u6f14\u7b97\u6cd5\uff0c\u900f\u904e\u627e\u51fa\u4e00\u500b\u8d85\u5e73\u9762\uff0c\u4f7f\u4e4b\u5c07\u5169\u500b\u4e0d\u540c\u7684\u96c6\u5408\u5206\u958b\u3002\u4e00\u822c\u7684\u5206\u985e\u554f\u984c\u6211\u5011\u5c31\u662f\u8981\uff0c\u627e\u51fa\u5728\u4e0d\u540c\u7684\u8cc7\u6599\u985e\u5225\u4e2d\u7684\u5206\u9694\u7dda\u3002\u4f46\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u9019\u500b\u5206\u9694\u7dda\u975e\u5e38\u8907\u96dc\u4e14\u6709\u5f88\u591a\u7a2e\u53ef\u80fd\u3002\u7136\u800c SVM \u5c31\u662f\u8981\u5728\u9019\u5f88\u591a\u7a2e\u7684\u53ef\u80fd\u7576\u4e2d\u627e\u51fa\u6700\u4f73\u7684\u89e3\u3002SVM \u6f14\u7b97\u6cd5\u7684\u7cbe\u795e\u5c31\u662f\u627e\u51fa\u4e00\u689d\u5206\u9694\u7dda\u4f7f\u6240\u6709\u5728\u908a\u754c\u4e0a\u7684\u9ede\u96e2\u5f97\u8d8a\u9060\u8d8a\u597d\uff0c\u4f7f\u6a21\u578b\u62b5\u6297\u96dc\u8a0a\u7684\u80fd\u529b\u66f4\u4f73\u3002 SVM \u53ef\u5206\u70ba\u4ee5\u4e0b\u5169\u7a2e\uff1a - \u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f - \u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f","title":"SVM \u5206\u985e\u5668"},{"location":"11.SVM/#_2","text":"\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f\u5c31\u662f\u5728\u4e0b\u5716\u7bc4\u4f8b\u7684\u4e8c\u7dad\u5716\u5f62\u4e2d\u627e\u51fa\u4e00\u689d\u7dda\uff0c\u76ee\u6a19\u8b93\u9019\u689d\u76f4\u7dda\u8207\u5169\u500b\u985e\u5225\u4e4b\u9593\u7684\u9593\u9694\u5bec\u5ea6\u8ddd\u96e2\u6700\u5927\u5316\u3002\u5176\u4e2d\u96e2\u5169\u689d\u865b\u7dda(\u9593\u9694\u8d85\u5e73\u9762)\u8ddd\u96e2\u6700\u8fd1\u7684\u9ede\uff0c\u5c31\u7a31\u70ba\u652f\u6301\u5411\u91cf (support vector)\u3002 \u7576\u7136\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f80\u5f80\u7a0d\u5fae\u8907\u96dc\uff0c\u90a3\u5982\u679c\u4e0d\u662f\u7dda\u6027\u53ef\u5206\u96c6\u5408\u600e\u9ebc\u8fa6\u5462\uff1f\u6211\u5011\u53ef\u4ee5\u904b\u7528\u6838\u51fd\u6578(kernel function) \u5e6b\u6211\u5011\u9020\u51fa\u4e0d\u53ef\u5206\u7684\u5206\u5272\u5e73\u9762\u3002","title":"\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#_3","text":"\u9664\u4e86\u9032\u884c\u7dda\u6027\u5206\u985e\u4e4b\u5916 SVM \u9084\u53ef\u4ee5\u4f7f\u7528\u6838\u6280\u5de7\u6709\u6548\u5730\u9032\u884c\u975e\u7dda\u6027\u5206\u985e\uff0c\u5c07\u5176\u8f38\u5165\u7684\u8cc7\u6599\u6295\u5230\u66f4\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\uff0c\u4e26\u5728\u9ad8\u7dad\u5ea6\u7684\u7a7a\u9593\u9032\u884c\u9ad8\u7dad\u5ea6\u7684\u5206\u985e\u6216\u964d\u7dad\u3002\u7c21\u55ae\u4f86\u8aaa\u900f\u904e\u591a\u7dad\u5ea6\u7684\u6295\u5f71\u6280\u5de7\uff0c\u5c07\u539f\u672c\u5728\u4e8c\u7dad\u7a7a\u9593\u4e2d\u4e0d\u53ef\u5206\u7684\u9ede\u5230\u4e86\u4e09\u7dad\u7a7a\u9593\u5c31\u53ef\u5206\u4e86\u3002\u4f46\u662f\u96a8\u8457\u8cc7\u6599\u91cf\u589e\u52a0\u5176\u904b\u7b97\u4e5f\u6703\u8b8a\u591a\uff0c\u76f8\u5c0d\u7684\u57f7\u884c\u901f\u5ea6\u5c31\u6703\u8b8a\u6162\u3002 \u5169\u500b\u975e\u7dda\u6027\u7684 Kernel\uff1a - Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db - Radial Basis Function \u9ad8\u65af\u8f49\u63db","title":"\u975e\u7dda\u6027\u53ef\u5206\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#_4","text":"SVM \u6f14\u7b97\u6cd5\u6700\u521d\u662f\u70ba\u4e8c\u5143\u5206\u985e\u554f\u984c\u6240\u8a2d\u8a08\u7684\uff0c\u4f46\u662f\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u4f8b\u5b50\u4e00\u5b9a\u4e0d\u53ea\u6709\u5169\u985e\u7684\u554f\u984c\u8981\u89e3\u6c7a\u3002\u4ed6\u7684\u89e3\u6c7a\u65b9\u5f0f\u8207 [Day 9 \u908f\u8f2f\u8ff4\u6b78] \u6240\u63d0\u5230\u7684\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u6a23\u7684\u3002\u4e3b\u8981\u662f\u5c07\u4e00\u500b\u591a\u5143\u5206\u985e\u554f\u984c\u8f49\u63db\u70ba\u591a\u500b\u4e8c\u5143\u5206\u985e\u554f\u984c\u3002\u5e38\u898b\u65b9\u6cd5\u5305\u62ec one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u3002 one-vs-rest(OvR) \u5c07\u67d0\u500b\u985e\u5225\u7684\u6a23\u672c\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u6a23\u672c\u6b78\u70ba\u53e6\u4e00\u985e many-vs-many(MvM) \u5728\u4efb\u610f\u5169\u985e\u6a23\u672c\u4e4b\u9593\u8a2d\u8a08\u4e00\u500b SVM \u8a73\u7d30\u4ecb\u7d39\u53ef\u4ee5\u53c3\u8003 [Day 9 \u908f\u8f2f\u8ff4\u6b78]","title":"\u591a\u5143\u5206\u985e\u652f\u6301\u5411\u91cf\u6a5f"},{"location":"11.SVM/#svr","text":"\u652f\u6301\u5411\u91cf\u6a5f\uff08SVM\uff09\u662f\u5c08\u9580\u8655\u7406\u5206\u985e\u7684\u554f\u984c\uff0c\u9084\u6709\u53e6\u4e00\u500b\u540d\u8a5e\u7a31\u70ba\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09\u5c08\u9580\u8655\u7406\u8ff4\u6b78\u554f\u984c\u3002SVR \u662f SVM \u7684\u5ef6\u4f38\uff0c\u800c\u652f\u6301\u5411\u91cf\u8ff4\u6b78\u53ea\u8981 f(x) \u8207 y \u504f\u96e2\u7a0b\u5ea6\u4e0d\u8981\u592a\u5927\uff0c\u65e2\u53ef\u4ee5\u8a8d\u70ba\u9810\u6e2c\u6b63\u78ba\u3002\u5982\u4e0b\u5716\u4e2d\u7684\u8ff4\u6b78\u7bc4\u4f8b\uff0c\u5728\u7dda\u6027\u7684 SVR \u6a21\u578b\u4e2d\u6703\u5728\u5de6\u53f3\u52a0\u4e0a \ud835\udf00 \u4f5c\u70ba\u6a21\u578b\u5bb9\u5fcd\u7684\u5340\u9593\u3002\u56e0\u6b64\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ea\u6709\u5728\u865b\u7dda\u4ee5\u5916\u7684\u8aa4\u5dee\u624d\u6703\u88ab\u8a08\u7b97\u3002\u6b64\u5916 SVR \u4e5f\u63d0\u4f9b\u4e86\u7dda\u6027\u8207\u975e\u7dda\u6027\u7684\u6838\u6280\u5de7\uff0c\u5176\u4e2d\u5728\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d\u53ef\u4ee5\u4f7f\u7528\u9ad8\u6b21\u65b9\u8f49\u63db\u6216\u662f\u9ad8\u65af\u8f49\u63db\u3002","title":"SVR \u8ff4\u6b78\u5668"},{"location":"11.SVM/#_5","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"11.SVM/#support-vector-machine-svm","text":"SVM \u80fd\u5920\u900f\u904e\u8d85\u53c3\u6578 C \u4f86\u9054\u5230 weight regularization \u4f86\u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u3002\u9664\u4e86\u9019\u9ede\u6211\u5011\u9084\u80fd\u900f\u904e SVM \u7684 Kernel trick \u7684\u65b9\u5f0f\u5c07\u8cc7\u6599\u505a\u975e\u7dda\u6027\u8f49\u63db\uff0c\u5e38\u898b\u7684 kernel \u9664\u4e86 linear \u7dda\u6027\u4ee5\u5916\u9084\u6709\u5169\u4e86\u975e\u7dda\u6027\u7684 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u4ee5\u53ca Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 \u56db\u7a2e\u4e0d\u540cSVC\u5206\u985e\u5668: 1. LinearSVC (\u7dda\u6027) 2. kernel='linear' (\u7dda\u6027) 3. kernel='poly' (\u975e\u7dda\u6027) 4. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002","title":"\u652f\u6301\u5411\u91cf\u6a5f (Support Vector Machine, SVM) \u6a21\u578b"},{"location":"11.SVM/#linearsvc","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - max_iter: \u6700\u5927\u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d1000\u3002 from sklearn import svm # \u5efa\u7acb linearSvc \u6a21\u578b linearSvcModel = svm . LinearSVC ( C = 1 , max_iter = 10000 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearSvcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearSvcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = linearSvcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.96","title":"LinearSVC"},{"location":"11.SVM/#kernellinear","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b svcModel = svm . SVC ( kernel = 'linear' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b svcModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = svcModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = svcModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97","title":"kernel='linear'"},{"location":"11.SVM/#kernelpoly","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVC ( kernel = 'poly' , degree = 3 , gamma = 'auto' , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = polyModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97","title":"kernel='poly'"},{"location":"11.SVM/#kernelrbf","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVC ( kernel = 'rbf' , gamma = 0.7 , C = 1 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = rbfModel . score ( train_reduced , y_train ) \u8a13\u7df4\u96c6 Accuracy: 0.97 \u6211\u5011\u85c9\u7531\u5716\u5f62\u5316\u7684\u908a\u754c\uff0c\u4f86\u4e86\u89e3\u4f7f\u7528\u4e0d\u540c\u7684 Kernel \u53ca\u4e0d\u540c\u53c3\u6578\u7684\u610f\u7fa9\u3002\u4ee5\u4e0b\u7bc4\u4f8b\u5c07\u539f\u5148 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u56db\u500b\u7279\u5fb5\u900f\u904e PCA \u964d\u6210\u4e8c\u7dad\uff0c\u4ee5\u5229\u6211\u5011\u505a\u8996\u89ba\u5316\u89c0\u5bdf\u3002\u900f\u904e\u56db\u7a2e\u4e0d\u540c\u7684 SVC \u5be6\u9a57\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e0d\u540c\u7684\u6838\u6280\u5de7\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u6c7a\u7b56\u908a\u7dda\u90fd\u4e0d\u76e1\u76f8\u540c\u3002\u7136\u800c\u8d8a\u8907\u96dc\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u908a\u754c\u5c31\u6703\u8b8a\u5f97\u8d8a\u626d\u66f2\uff0c\u56e0\u70ba\u975e\u7dda\u6027\u7684\u6a21\u578b\u80fd\u5920\u6709\u6bd4\u8f03\u597d\u7684\u64ec\u5408\u4f7f\u5f97\u932f\u8aa4\u7387\u964d\u4f4e\u3002","title":"kernel='rbf'"},{"location":"11.SVM/#support-vector-regression-svr","text":"\u5728 Sklearn \u4e2d SVM \u63d0\u4f9b\u8ff4\u6b78\u7684\u6a21\u578b\u7a31\u4f5c SVR\u3002\u6b64\u5916 SVR \u8ff4\u6b78\u5668\u4e5f\u63d0\u4f9b\u4e86\u4e09\u7a2e\u4e0d\u540c\u7684\u6838\u51fd\u6578\uff0c\u5206\u5225\u6709\u4e00\u500b\u7dda\u6027\u4ee5\u53ca\u5169\u500b\u975e\u7dda\u6027\u7684\u6a21\u578b\u53ef\u4ee5\u547c\u53eb\u3002\u5728 SVR \u8ff4\u6b78\u7684\u5be6\u9a57\uff0c\u6211\u5011\u62ff\u4e00\u7d44\u975e\u7dda\u6027\u7684\u8cc7\u6599\u4f5c\u70ba\u4f8b\u5b50\u3002\u4e26\u67e5\u770b\u5728\u4e0d\u540c\u7684\u6838\u6280\u5de7\u4e0b\u6a21\u578b\u6240\u64ec\u5408\u7684\u6210\u6548\u70ba\u4f55\uff1f \u4e09\u7a2e\u4e0d\u540cSVR\u8ff4\u6b78\u5668: 1. kernel='linear' (\u7dda\u6027) 2. kernel='poly' (\u975e\u7dda\u6027) 3. kernel='rbf' (\u975e\u7dda\u6027) Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002","title":"\u652f\u6301\u5411\u91cf\u8ff4\u6b78\uff08Support Vector Regression, SVR\uff09 \u6a21\u578b"},{"location":"11.SVM/#kernellinear_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528\u7dda\u6027\u3002 from sklearn import svm # \u5efa\u7acb kernel='linear' \u6a21\u578b linearModel = svm . SVR ( C = 1 , kernel = 'linear' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b linearModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = linearModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 5.903802524650818","title":"kernel='linear'"},{"location":"11.SVM/#kernelpoly_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Polynomial \u9ad8\u6b21\u65b9\u8f49\u63db\u3002 - degree: \u589e\u52a0\u6a21\u578b\u8907\u96dc\u5ea6\uff0c3 \u4ee3\u8868\u8f49\u63db\u5230\u4e09\u6b21\u7a7a\u9593\u9032\u884c\u5206\u985e\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u9810\u6e2c\u3002 from sklearn import svm # \u5efa\u7acb kernel='poly' \u6a21\u578b polyModel = svm . SVR ( C = 6 , kernel = 'poly' , degree = 3 , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b polyModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = polyModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 8.296270605383441","title":"kernel='poly'"},{"location":"11.SVM/#kernelrbf_1","text":"Parameters: - C: \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\uff0c\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002 - kernel: \u6b64\u7bc4\u4f8b\u63a1\u7528 Radial Basis Function \u9ad8\u65af\u8f49\u63db\u3002 - gamma: \u6578\u503c\u8d8a\u5927\u8d8a\u80fd\u505a\u8907\u96dc\u7684\u5206\u985e\u908a\u754c\u3002 from sklearn import svm # \u5efa\u7acb kernel='rbf' \u6a21\u578b rbfModel = svm . SVR ( C = 6 , kernel = 'rbf' , gamma = 'auto' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b rbfModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = rbfModel . predict ( x_test ) \u8a13\u7df4\u96c6 MSE: 2.2551572190243157 \u9019\u88e1\u7684\u8ff4\u6b78\u6a21\u578b\u63a1\u7528\u975e\u7dda\u6027\u7684\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u7684\u5be6\u9a57\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u7684\u6838\u51fd\u6578\u7121\u6cd5\u6709\u6548\u7684\u9810\u6e2c\u6240\u6709\u6578\u64da\u9ede\u7684\u8da8\u52e2\u3002\u800c\u975e\u7dda\u6027\u7684\u6a21\u578b\u4e2d RBF \u7684\u6a21\u578b\u5c0d\u65bc\u6b64\u8cc7\u6599\u6709\u6bd4\u8f03\u597d\u7684\u9810\u6e2c\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"kernel='rbf'"},{"location":"12.\u6c7a\u7b56\u6a39/","text":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4ecb\u7d39 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u6c7a\u7b56\u6a39\u5206\u985e\u5668 \u89c0\u5bdf\u6c7a\u7b56\u6a39\u662f\u5982\u4f55\u751f\u6210\u7684\u3002 \u5be6\u4f5c\u6c7a\u7b56\u6a39\u8ff4\u6b78\u5668 \u67e5\u770b\u6c7a\u7b56\u6a39\u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Regression)\uff1a \u6c7a\u7b56\u6a39 \u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u7522\u751f\u4e00\u68f5\u6a39\uff0c\u4f9d\u64da\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u4f86\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u65b9\u5f0f\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de(\u4e82\u5ea6)\uff0c\u4f8b\u5982\u50cf\u662f Information gain\u3001Gain ratio\u3001Gini index\u3002\u4f9d\u64da\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u5408\u9069\u7684\u898f\u5247\uff0c\u6700\u7d42\u751f\u6210\u4e00\u500b\u898f\u5247\u6a39\u4f86\u6c7a\u7b56\u6240\u6709\u4e8b\u60c5\uff0c\u5176\u76ee\u7684\u4f7f\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5c31\u597d\u6bd4\u6211\u5011\u8a55\u4f30\u4eca\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\uff0c\u5929\u6c23\u56e0\u5b50\u53ef\u80fd\u7ad9\u6bd4\u8f03\u5927\u7684\u56e0\u7d20\uff0c\u800c Co2 \u7684\u6fc3\u5ea6\u9ad8\u4f4e\u53ef\u80fd\u4f54\u7684\u56e0\u5b50\u7a0b\u5ea6\u8f03\u4f4e\u3002\u56e0\u6b64\u5728\u7b2c\u4e00\u5c64\u7684\u6c7a\u7b56\u4e2d\u4ee5\u5929\u6c23\u7684\u7279\u5fb5\u5148\u9032\u884c\u7b2c\u4e00\u6b21\u7684\u6c7a\u7b56\u5224\u65b7\u3002\u63a5\u8457\u7b2c\u4e8c\u5c64\u518d\u5f9e\u6240\u6709\u7279\u5fb5\u4e2d\u5c0b\u627e\u6700\u9069\u5408\u7684\u6c7a\u7b56\u56e0\u5b50\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6a39\u7684\u6df1\u5ea6\u5373\u505c\u6b62\u6a39\u7684\u751f\u9577\u3002 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u6c7a\u7b56\u6a39\u662f\u4ee5\u4e00\u500b\u8caa\u5a6a\u6cd5\u5247\u4f86\u6c7a\u5b9a\u6bcf\u4e00\u5c64\u8981\u554f\u4ec0\u9ebc\u554f\u984c\uff0c\u76ee\u6a19\u662f\u5206\u985e\u904e\u5f8c\u6bcf\u4e00\u7fa4\u80fd\u5920\u5f88\u660e\u986f\u7684\u77e5\u9053\u662f\u5c6c\u65bc\u54ea\u4e00\u7a2e\u985e\u5225\u3002\u5ef6\u7e8c\u4e0a\u9762\u7684\u4f8b\u5b50\uff0c\u4ee5\u5206\u985e\u554f\u984c\u4f86\u8aaa\u5047\u8a2d\u8981\u8a55\u4f30\u660e\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\u3002\u5728\u6a39\u7684\u7b2c\u4e00\u5c64\u7bc0\u9ede\u4e2d\u6211\u5011\u8981\u5f9e\u5df2\u77e5\u7684\u5169\u500b\u7279\u5fb5\u5206\u5225\u662f\u6eab\u5ea6\u8207\u7279\u5fb5\u9078\u4e00\u500b\u4f5c\u70ba\u8a72\u5c64\u7684\u6c7a\u7b56\u56e0\u5b50\u3002\u5047\u8a2d\u76ee\u524d\u8a13\u7df4\u96c6\u6709\u4e94\u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u6b63\u5e38\u8209\u884c\u7684\u6709\u5169\u7b46\u8cc7\u6599\uff0c\u53d6\u6d88\u8209\u884c\u7684\u6709\u4e09\u7b46\u8cc7\u6599\u3002\u5728\u6a39\u7684\u7d50\u69cb\u4e2d\u5de6\u5b50\u6a39\u70ba\u6c7a\u7b56\u6b63\u5e38\u53d6\u884c\uff0c\u800c\u53f3\u5b50\u6a39\u662f\u6c7a\u7b56\u53d6\u6d88\u8209\u884c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7576\u7279\u5fb5\u70ba\u5929\u6c23\u7684\u6642\u5019\u53ef\u4ee5\u4e00\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5169\u985e\u5225\u5b8c\u6574\u5206\u958b\uff0c\u56e0\u6b64\u6211\u5011\u6703\u5c07\u5929\u6c23\u4f5c\u70ba\u9019\u4e00\u5c64\u5224\u65b7\u7684\u56e0\u5b50\u3002\u9019\u5c31\u662f\u6c7a\u7b56\u6a39\u5728\u751f\u6210\u4e2d\u7684\u8caa\u5a6a\u6a5f\u5236\u3002\u7136\u800c\u8981\u5982\u4f55\u53bb\u5224\u65b7\u6bcf\u6b21\u6c7a\u7b56\u7684\u597d\u58de\uff0c\u5c31\u5fc5\u9808\u4f9d\u9760\u4e82\u5ea6\u7684\u8a55\u4f30\u6307\u6a19\u3002 \u6c7a\u7b56\u6a39\u7684\u6df7\u4e82\u8a55\u4f30\u6307\u6a19 \u6211\u5011\u9700\u8981\u5ba2\u89c0\u7684\u6a19\u6e96\u4f86\u6c7a\u5b9a\u6c7a\u7b56\u6a39\u7684\u6bcf\u500b\u5206\u652f\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u6709\u4e00\u500b\u8a55\u65b7\u7684\u6307\u6a19\u4f86\u5354\u52a9\u6211\u5011\u6c7a\u7b56\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u6307\u6a19\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de\uff0c\u5e38\u898b\u7684\u6c7a\u7b56\u4e82\u5ea6\u8a55\u4f30\u6307\u6a19\u6709 Information gain\u3001Gain ratio\u3001Gini index\u3002\u6211\u5011\u76ee\u6a19\u662f\u5f9e\u8a13\u7df4\u8cc7\u6599\u4e2d\u627e\u51fa\u4e00\u5957\u6c7a\u7b56\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u4ee5\u4e0a\u7684\u6307\u6a19\u90fd\u662f\u5728\u8861\u91cf\u4e00\u500b\u5e8f\u5217\u4e2d\u7684\u6df7\u4e82\u7a0b\u5ea6\uff0c\u5176\u6578\u503c\u8d8a\u9ad8\u4ee3\u8868\u8d8a\u6df7\u4e82\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u9810\u8a2d\u4f7f\u7528 Gini\u3002 Information gain (\u8cc7\u8a0a\u7372\u5229) Gain ratio (\u5409\u5c3c\u7372\u5229) Gini index (\u5409\u5c3c\u4fc2\u6578) = Gini Impurity (\u5409\u5c3c\u4e0d\u7d14\u5ea6) \u8a55\u4f30\u5206\u5272\u8cc7\u8a0a\u91cf Information Gain \u900f\u904e\u5f9e\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5176\u7b97\u6cd5\u4e3b\u8981\u662f\u8a08\u7b97\u71b5\uff0c\u56e0\u6b64\u7d93\u7531\u6c7a\u7b56\u6a39\u5206\u5272\u5f8c\u7684\u8cc7\u8a0a\u91cf\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u800c Gini \u7684\u6578\u503c\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u4e82\uff0c\u6578\u503c\u7686\u70ba 0~1 \u4e4b\u9593\uff0c\u5176\u4e2d 0 \u4ee3\u8868\u8a72\u7279\u5fb5\u5728\u5e8f\u5217\u4e2d\u662f\u5b8c\u7f8e\u7684\u5206\u985e\u3002\u5e38\u898b\u7684\u8cc7\u8a0a\u91cf\u8a55\u4f30\u65b9\u6cd5\u6709\u5169\u7a2e\uff1a\u8cc7\u8a0a\u7372\u5229 (Information Gain) \u4ee5\u53ca Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)\u3002 \u71b5 (Entropy) \u71b5 (Entropy) \u662f\u8a08\u7b97 Information Gain \u7684\u4e00\u7a2e\u65b9\u6cd5\u3002\u5728\u4e86\u89e3 Information Gain \u4e4b\u524d\u8981\u5148\u4e86\u89e3\u71b5\u662f\u5982\u4f55\u88ab\u8a08\u7b97\u51fa\u4f86\u7684\u3002\u5176\u4e2d\u5728\u4e0b\u5716\u516c\u5f0f\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019 Entropy \u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642 Entropy \u5373\u70ba 1\u3002 Gini \u4e0d\u7d14\u5ea6 (Gini Impurity) Gini \u4e0d\u7d14\u5ea6\u662f\u53e6\u4e00\u7a2e\u4e82\u5ea6\u7684\u8861\u91cf\u65b9\u5f0f\uff0c\u5b83\u7684\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u8d8a\u6df7\u4e82\u3002\u516c\u5f0f\u5982\u4e0b\u6240\u793a\uff0c\u5176\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u70ba\u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0.5\u3002 \u8ff4\u6b78\u6a39 \u6c7a\u7b56\u6a39\u8ff4\u6b78\u65b9\u6cd5\u8207\u5206\u985e\u6709\u9ede\u985e\u4f3c\u5dee\u5225\u50c5\u5728\u65bc\u8a55\u4f30\u5206\u679d\u597d\u58de\u7684\u65b9\u5f0f\u4e0d\u540c\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u4f5c\u8ff4\u6b78\u6a39\u3002\u7576\u6578\u64da\u96c6\u7684\u8f38\u51fa\u7232\u9023\u7e8c\u6027\u6578\u503c\u6642\uff0c\u8a72\u6a39\u7b97\u6cd5\u5c31\u662f\u4e00\u500b\u8ff4\u6b78\u6a39\u3002\u900f\u904e\u6a39\u7684\u5c55\u958b\uff0c\u4e26\u7528\u8449\u7bc0\u9ede\u7684\u5747\u503c\u4f5c\u7232\u9810\u6e2c\u503c\u3002\u5f9e\u6839\u7bc0\u9ede\u958b\u59cb\uff0c\u5c0d\u6a23\u672c\u7684\u67d0\u4e00\u7279\u5fb5\u9032\u884c\u6e2c\u8a66\u3002\u7d93\u904e\u8a55\u4f30\u5f8c\uff0c\u5c07\u6a23\u672c\u5206\u914d\u5230\u5176\u5b50\u7d50\u9ede\u3002\u6b64\u6642\u6bcf\u4e00\u500b\u5b50\u7bc0\u9ede\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u4e00\u500b\u503c\u3002\u4f9d\u7167\u9019\u6a23\u65b9\u5f0f\u9032\u884c\uff0c\u76f4\u81f3\u5230\u9054\u8449\u7d50\u9ede\u3002\u6b64\u6642\u8aa4\u5dee\u503c\u8981\u6700\u5c0f\u5316\uff0c\u4e26\u4e14\u8d8a\u63a5\u8fd1\u96f6\u8d8a\u597d\u3002 \u8ff4\u6b78\u6a39\u7684\u751f\u9577\u904e\u7a0b\u5f88\u63a8\u85a6\u770b \u9019\u7bc7 \u6587\u7ae0 \u4ee5\u4e0b\u8209\u4e00\u500b\u4f8b\u5b50\u5047\u8a2d x \u662f\u8f38\u5165 y \u662f\u8f38\u51fa\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u4e00\u500b\u5e73\u9762\u4e0a\u7e6a\u88fd\u51fa\u8cc7\u6599\u8207\u6b63\u78ba\u7b54\u6848\u9593\u7684\u5206\u4f48\u3002\u5047\u8a2d\u8ff4\u6b78\u6a39\u7684\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u5169\u5c64\u3002\u9996\u5148\u5728\u7b2c\u4e00\u5c64\u4e2d\u6703\u5c07\u6240\u6709\u7684\u8cc7\u6599\u5f9e\u4e2d\u9593\u5207\u4e00\u5200\u6b64\u65b7\u9ede\u70ba x=0.496 \u7576\u5927\u65bc\u8a2d\u5b9a\u7684\u503c\u7684\u6578\u64da\u9ede\u6703\u7e7c\u7e8c\u5f80\u53f3\u5b50\u6a39\u4e0b\u53bb\u5ef6\u4f38\uff0c\u53cd\u4e4b\u5c0f\u65bc 0.496 \u7684\u8cc7\u6599\u9ede\u6703\u5f80\u5de6\u5b50\u6a39\u8d70\u3002\u6b64\u6642\u5c07\u6703\u5207\u51fa\u4e00\u500b\u5206\u652f\u51fa\u4f86\u4e26\u5f80\u4e0b\u64f4\u5c55\u4e26\u5f62\u6210\u7b2c\u4e8c\u5c64\u7684\u6c7a\u7b56\u5206\u652f\u3002\u4e00\u76f4\u4e0d\u65b7\u6301\u7e8c\u62d3\u5c55\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u7d42\u6b62\uff0c\u6b64\u6642\u7684\u7bc0\u9ede\u5373\u70ba\u8449\u7bc0\u9ede\u4e5f\u5c31\u662f\u6700\u7d42\u7684\u6a21\u578b\u8f38\u51fa\u503c\u3002 \u6a39\u8d8a\u6df1\u6a21\u578b\u8d8a\u8907\u96dc \u5047\u8a2d\u6211\u5011\u751f\u6210\u4e00\u500b f(x) = 3x+15 + noise \u7684\u8cc7\u6599\uff0c\u5176\u4e2d noise \u70ba\u4e00\u500b 0~1 \u4e4b\u9593\u7684\u96a8\u6a5f\u6578\u3002\u5f9e\u4ee5\u4e0b\u7684\u6e2c\u8a66\u53ef\u4ee5\u770b\u51fa\u96a8\u8457\u6c7a\u7b56\u6a39\u6df1\u5ea6\u7684\u589e\u52a0\uff0c\u6c7a\u7b56\u6a39\u7684\u64ec\u5408\u80fd\u529b\u4e0d\u65b7\u4e0a\u5347\u3002\u6c7a\u7b56\u6a39\u5df2\u7d93\u4e0d\u50c5\u50c5\u64ec\u5408\u4e86\u6211\u5011\u7684\u7dda\u6027\u51fd\u5f0f 3x+15 \uff0c\u540c\u6642\u4e5f\u64ec\u5408\u4e86\u6211\u5011\u6dfb\u52a0\u7684\u566a\u97f3(noise)\u3002 \u8ff4\u6b78\u6a39\u8a72\u5982\u4f55\u9078\u64c7\u5207\u5272\u9ede? \u5728\u5206\u985e\u6a21\u578b\u4e2d\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e82\u5ea6\u4f5c\u70ba\u6c7a\u7b56\u6a39\u751f\u6210\u6642\u5019\u7684\u8a55\u4f30\u6307\u6a19\u3002\u4f46\u662f\u8ff4\u6b78\u6a39\u900f\u904e\u662f MSE \u6216 MAE \u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u4e26\u627e\u51fa\u8aa4\u5dee\u6700\u5c0f\u7684\u503c\u4f5c\u70ba\u6a39\u7684\u7279\u5fb5\u9078\u64c7\u8207\u5207\u5272\u9ede\u3002\u5176\u4e2d\u524d\u8005\u662f\u5747\u65b9\u5dee\uff0c\u5f8c\u8005\u662f\u548c\u5747\u503c\u4e4b\u5dee\u7684\u7d55\u5c0d\u503c\u4e4b\u548c\u3002 CART \u6c7a\u7b56\u6a39 \u5728 Sklearn \u5957\u4ef6\u4e2d\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u662f\u63a1\u7528 CART (Classification and Regression Tree) \u6f14\u7b97\u6cd5\uff0c\u4e26\u4e14\u53ef\u4ee5\u88ab\u62ff\u4f86\u505a\u5206\u985e\u548c\u8ff4\u6b78\u7684\u9810\u6e2c\u3002\u5728\u6c7a\u7b56\u6a39\u7684\u6bcf\u4e00\u500b\u7bc0\u9ede\u4e0a\u90fd\u662f\u63a1\u7528\u4e8c\u5206\u6cd5\uff0c\u4e5f\u5c31\u662f\u6bcf\u4e00\u500b\u6c7a\u7b56\u7bc0\u9ede\u53ea\u5206\u679d\u51fa\u5169\u500b\u5b50\u7bc0\u9ede\u3002\u4e26\u4e14\u4e0d\u65b7\u5730\u5f80\u4e0b\u62d3\u5c55\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u70ba\u6b62\uff0c\u6b64\u6642\u6700\u5927\u6df1\u5ea6\u7684\u7bc0\u9ede\u7a31\u70ba\u8449\u7bc0\u9ede\u5373\u70ba\u6a21\u578b\u7684\u9810\u6e2c\u8f38\u51fa\u3002 \u6c7a\u7b56\u6a39\u6a21\u578b\u7684\u512a\u7f3a\u9ede \u5efa\u7acb\u6c7a\u7b56\u6a39\u7684\u904e\u7a0b\u5c31\u662f\u4e0d\u65b7\u7684\u5c0b\u627e\u7279\u5fb5\u9032\u884c\u6c7a\u7b56\uff0c\u900f\u904e\u9019\u4e9b\u6c7a\u7b56\u76e1\u91cf\u7684\u4f7f\u9019\u4e9b\u8cc7\u6599\u88ab\u5206\u70ba\u540c\u4e00\u500b\u985e\u5225\uff0c\u4e14\u8a66\u8457\u8b93\u6df7\u4e82\u7a0b\u5ea6\u8d8a\u5c0f\u8d8a\u597d\u3002\u5207\u8a18\u6a39\u7684\u6df1\u5ea6\u8d8a\u6df1\u4e0d\u4e00\u5b9a\u8d8a\u597d\uff0c\u4ed6\u53ef\u80fd\u6703\u9020\u6210\u904e\u5ea6\u64ec\u5408\u7684\u554f\u984c\u3002\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6211\u5011\u80fd\u5920\u8996\u89ba\u5316\u6c7a\u7b56\u6a39\u7684\u7d50\u69cb\uff0c\u76f8\u5c0d\u7684\u53ef\u89e3\u91cb\u6027\u5c31\u8b8a\u9ad8\u3002\u6b64\u5916\u8207\u5176\u5b83\u7684ML\u6a21\u578b\u6bd4\u8f03\u8d77\u4f86\uff0c\u6c7a\u7b56\u6a39\u57f7\u884c\u901f\u5ea6\u662f\u5b83\u7684\u4e00\u5927\u512a\u52e2\u3002\u56e0\u70ba\u662f\u6a39\u72c0\u7d50\u69cb\uff0c\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u6642\u5019\u6bcf\u500b\u6c7a\u7b56\u968e\u6bb5\u90fd\u76f8\u7576\u7684\u660e\u78ba\u6e05\u695a\uff0c\u4e0d\u662f 0 \u5c31\u662f 1\u3002 \u6c7a\u7b56\u6a39\u7e3d\u7d50 \u6c7a\u7b56\u6a39\u900f\u904e\u6240\u6709\u7279\u5fb5\u8207\u5c0d\u61c9\u7684\u503c\u5c07\u8cc7\u6599\u5207\u5206\uff0c\u4f86\u627e\u51fa\u6700\u9069\u5408\u7684\u5206\u679d\u4e26\u7e7c\u7e8c\u5f80\u4e0b\u62d3\u5c55\u3002\u82e5\u6c7a\u7b56\u6a39\u6df1\u5ea6\u8d8a\u6df1\u5247\u6c7a\u7b56\u7684\u898f\u5247\u5c07\u8d8a\u8907\u96dc\uff0c\u6a21\u578b\u9810\u6e2c\u4e5f\u6703\u8d8a\u63a5\u8fd1\u771f\u5be6\u7b54\u6848\u3002\u4f46\u82e5\u8a13\u7df4\u96c6\u4e2d\u542b\u6709\u904e\u591a\u7684\u96dc\u8a0a\uff0c\u592a\u6df1\u7684\u6a39\u5c31\u6709\u53ef\u80fd\u7522\u751f\u904e\u64ec\u5408\u7684\u60c5\u5f62\u3002\u56e0\u6b64\u55ae\u4e00\u7684\u6c7a\u7b56\u6a39\u80af\u5b9a\u662f\u4e0d\u5920\u7528\u7684\uff0c\u6211\u5011\u53ef\u4ee5\u5229\u7528\u96c6\u6210\u5b78\u7fd2\u4e2d\u7684 Boosting \u67b6\u69cb\uff0c\u5c0d\u8ff4\u6b78\u6a39\u9032\u884c\u6539\u826f\u5347\u7d1a\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u5206\u985e\u6c7a\u7b56\u6a39 \u4e00\u500b\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u81ea\u52d5\u7522\u751f\u4e00\u68f5\u6a39\u3002\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8cc7\u6599\u7522\u751f\u5f88\u591a\u6a39\u72c0\u7684\u898f\u5247\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u6703\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002 Parameters: - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeClassifier # \u5efa\u7acb DecisionTreeClassifier \u6a21\u578b decisionTreeModel = DecisionTreeClassifier ( criterion = 'entropy' , max_depth = 6 , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = decisionTreeModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = decisionTreeModel . score ( train_reduced , y_train ) \u6211\u5011\u900f\u904e\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u4e26\u8a13\u7df4\u4e00\u500b\u6c7a\u7b56\u6a39\u6a21\u578b\u3002\u900f\u904e\u7e6a\u88fd\u8a13\u7df4\u6c7a\u7b56\u908a\u754c\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u4e0b\u5716\u53f3\u624b\u908a\u7684\u8a13\u7df4\u96c6\u5b8c\u6574\u5730\u5c07\u4e09\u500b\u985e\u5225\u5207\u5272\u958b\u4f86\u3002\u800c\u5728\u53f3\u908a\u7684\u6e2c\u8a66\u96c6\u4e2d\u50c5\u6709\u4e00\u7b46\u7d05\u8272\u6846\u8d77\u4f86\u7684\u8cc7\u6599\u9810\u6e2c\u932f\u8aa4\u3002 \u8ff4\u6b78\u6c7a\u7b56\u6a39 Parameters: - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/friedman_mse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeRegressor # \u5efa\u7acb DecisionTreeRegressor \u6a21\u578b decisionTreeModel = DecisionTreeRegressor ( criterion = 'mse' , max_depth = 4 , splitter = 'best' , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = decisionTreeModel . predict ( x ) \u5728\u8ff4\u6b78\u6c7a\u7b56\u6a39\u4e2d\u6211\u5011\u4f7f\u7528\u4e86\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u8207\u975e\u7dda\u6027\u8ff4\u6b78\u5169\u7a2e\u8cc7\u6599\u96c6\u9032\u884c\u6578\u64da\u64ec\u5408\u5be6\u9a57\u3002\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u4e2d\u6211\u5011\u5c07\u6578\u64da\u9ede\u6dfb\u52a0\u4e00\u4e9b\u566a\u97f3\u8b93\u8cc7\u6599\u5206\u5e03\u5728\u659c\u76f4\u7dda\u4e0a\u3002\u5de6\u5716\u662f\u8ff4\u6b78\u6a39\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u8a13\u7df4\u7d50\u679c\uff0c\u53ef\u4ee5\u96b1\u7d04\u5730\u770b\u5230\u6a21\u578b\u6c7a\u7b56\u7684\u65b9\u5f0f\u5448\u73fe\u968e\u68af\u72c0\u614b\u3002\u5982\u679c\u6211\u5011\u5617\u8a66\u7684\u5c07\u6578\u7684\u6df1\u5ea6\u589e\u52a0\uff0c\u6a21\u578b\u76f8\u5c0d\u8907\u96dc\u56e0\u6b64\u53ef\u4ee5\u64ec\u5408\u5f97\u66f4\u597d\u3002\u800c\u53f3\u908a\u662f\u900f\u904e\u96a8\u6a5f\u7522\u751f\u7684\u975e\u7dda\u6027\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u6642\u5019\uff0c\u8a13\u7df4\u7d50\u679c\u5c31\u9084\u4e0d\u932f\u4e86\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u770b\u770b\u8abf\u6574\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u4ee5\u53ca\u5176\u4ed6\u7684\u8d85\u53c3\u6578\u5c0d\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u7684\u5f71\u97ff\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree)"},{"location":"12.\u6c7a\u7b56\u6a39/#day-12-decision-tree","text":"","title":"[Day 12] \u6c7a\u7b56\u6a39 (Decision tree)"},{"location":"12.\u6c7a\u7b56\u6a39/#_1","text":"\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4ecb\u7d39 \u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f \u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u6c7a\u7b56\u6a39\u5206\u985e\u5668 \u89c0\u5bdf\u6c7a\u7b56\u6a39\u662f\u5982\u4f55\u751f\u6210\u7684\u3002 \u5be6\u4f5c\u6c7a\u7b56\u6a39\u8ff4\u6b78\u5668 \u67e5\u770b\u6c7a\u7b56\u6a39\u65b9\u6cd5\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u548c\u975e\u7dda\u6027\u8ff4\u6b78\u8868\u73fe\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u6c7a\u7b56\u6a39(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"12.\u6c7a\u7b56\u6a39/#_2","text":"\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u7522\u751f\u4e00\u68f5\u6a39\uff0c\u4f9d\u64da\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u4f86\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u65b9\u5f0f\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de(\u4e82\u5ea6)\uff0c\u4f8b\u5982\u50cf\u662f Information gain\u3001Gain ratio\u3001Gini index\u3002\u4f9d\u64da\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u5408\u9069\u7684\u898f\u5247\uff0c\u6700\u7d42\u751f\u6210\u4e00\u500b\u898f\u5247\u6a39\u4f86\u6c7a\u7b56\u6240\u6709\u4e8b\u60c5\uff0c\u5176\u76ee\u7684\u4f7f\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5c31\u597d\u6bd4\u6211\u5011\u8a55\u4f30\u4eca\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\uff0c\u5929\u6c23\u56e0\u5b50\u53ef\u80fd\u7ad9\u6bd4\u8f03\u5927\u7684\u56e0\u7d20\uff0c\u800c Co2 \u7684\u6fc3\u5ea6\u9ad8\u4f4e\u53ef\u80fd\u4f54\u7684\u56e0\u5b50\u7a0b\u5ea6\u8f03\u4f4e\u3002\u56e0\u6b64\u5728\u7b2c\u4e00\u5c64\u7684\u6c7a\u7b56\u4e2d\u4ee5\u5929\u6c23\u7684\u7279\u5fb5\u5148\u9032\u884c\u7b2c\u4e00\u6b21\u7684\u6c7a\u7b56\u5224\u65b7\u3002\u63a5\u8457\u7b2c\u4e8c\u5c64\u518d\u5f9e\u6240\u6709\u7279\u5fb5\u4e2d\u5c0b\u627e\u6700\u9069\u5408\u7684\u6c7a\u7b56\u56e0\u5b50\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6a39\u7684\u6df1\u5ea6\u5373\u505c\u6b62\u6a39\u7684\u751f\u9577\u3002","title":"\u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_3","text":"\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e00\u500b\u8caa\u5a6a\u6cd5\u5247\u4f86\u6c7a\u5b9a\u6bcf\u4e00\u5c64\u8981\u554f\u4ec0\u9ebc\u554f\u984c\uff0c\u76ee\u6a19\u662f\u5206\u985e\u904e\u5f8c\u6bcf\u4e00\u7fa4\u80fd\u5920\u5f88\u660e\u986f\u7684\u77e5\u9053\u662f\u5c6c\u65bc\u54ea\u4e00\u7a2e\u985e\u5225\u3002\u5ef6\u7e8c\u4e0a\u9762\u7684\u4f8b\u5b50\uff0c\u4ee5\u5206\u985e\u554f\u984c\u4f86\u8aaa\u5047\u8a2d\u8981\u8a55\u4f30\u660e\u5929\u6bd4\u8cfd\u662f\u5426\u8209\u884c\u3002\u5728\u6a39\u7684\u7b2c\u4e00\u5c64\u7bc0\u9ede\u4e2d\u6211\u5011\u8981\u5f9e\u5df2\u77e5\u7684\u5169\u500b\u7279\u5fb5\u5206\u5225\u662f\u6eab\u5ea6\u8207\u7279\u5fb5\u9078\u4e00\u500b\u4f5c\u70ba\u8a72\u5c64\u7684\u6c7a\u7b56\u56e0\u5b50\u3002\u5047\u8a2d\u76ee\u524d\u8a13\u7df4\u96c6\u6709\u4e94\u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u6b63\u5e38\u8209\u884c\u7684\u6709\u5169\u7b46\u8cc7\u6599\uff0c\u53d6\u6d88\u8209\u884c\u7684\u6709\u4e09\u7b46\u8cc7\u6599\u3002\u5728\u6a39\u7684\u7d50\u69cb\u4e2d\u5de6\u5b50\u6a39\u70ba\u6c7a\u7b56\u6b63\u5e38\u53d6\u884c\uff0c\u800c\u53f3\u5b50\u6a39\u662f\u6c7a\u7b56\u53d6\u6d88\u8209\u884c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7576\u7279\u5fb5\u70ba\u5929\u6c23\u7684\u6642\u5019\u53ef\u4ee5\u4e00\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5169\u985e\u5225\u5b8c\u6574\u5206\u958b\uff0c\u56e0\u6b64\u6211\u5011\u6703\u5c07\u5929\u6c23\u4f5c\u70ba\u9019\u4e00\u5c64\u5224\u65b7\u7684\u56e0\u5b50\u3002\u9019\u5c31\u662f\u6c7a\u7b56\u6a39\u5728\u751f\u6210\u4e2d\u7684\u8caa\u5a6a\u6a5f\u5236\u3002\u7136\u800c\u8981\u5982\u4f55\u53bb\u5224\u65b7\u6bcf\u6b21\u6c7a\u7b56\u7684\u597d\u58de\uff0c\u5c31\u5fc5\u9808\u4f9d\u9760\u4e82\u5ea6\u7684\u8a55\u4f30\u6307\u6a19\u3002","title":"\u6c7a\u7b56\u6a39\u5982\u4f55\u751f\u6210\uff1f"},{"location":"12.\u6c7a\u7b56\u6a39/#_4","text":"\u6211\u5011\u9700\u8981\u5ba2\u89c0\u7684\u6a19\u6e96\u4f86\u6c7a\u5b9a\u6c7a\u7b56\u6a39\u7684\u6bcf\u500b\u5206\u652f\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u6709\u4e00\u500b\u8a55\u65b7\u7684\u6307\u6a19\u4f86\u5354\u52a9\u6211\u5011\u6c7a\u7b56\u3002\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u6307\u6a19\u4f86\u8a55\u4f30\u5206\u679d\u7684\u597d\u58de\uff0c\u5e38\u898b\u7684\u6c7a\u7b56\u4e82\u5ea6\u8a55\u4f30\u6307\u6a19\u6709 Information gain\u3001Gain ratio\u3001Gini index\u3002\u6211\u5011\u76ee\u6a19\u662f\u5f9e\u8a13\u7df4\u8cc7\u6599\u4e2d\u627e\u51fa\u4e00\u5957\u6c7a\u7b56\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u4ee5\u4e0a\u7684\u6307\u6a19\u90fd\u662f\u5728\u8861\u91cf\u4e00\u500b\u5e8f\u5217\u4e2d\u7684\u6df7\u4e82\u7a0b\u5ea6\uff0c\u5176\u6578\u503c\u8d8a\u9ad8\u4ee3\u8868\u8d8a\u6df7\u4e82\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u9810\u8a2d\u4f7f\u7528 Gini\u3002 Information gain (\u8cc7\u8a0a\u7372\u5229) Gain ratio (\u5409\u5c3c\u7372\u5229) Gini index (\u5409\u5c3c\u4fc2\u6578) = Gini Impurity (\u5409\u5c3c\u4e0d\u7d14\u5ea6)","title":"\u6c7a\u7b56\u6a39\u7684\u6df7\u4e82\u8a55\u4f30\u6307\u6a19"},{"location":"12.\u6c7a\u7b56\u6a39/#_5","text":"Information Gain \u900f\u904e\u5f9e\u8a13\u7df4\u8cc7\u6599\u627e\u51fa\u898f\u5247\uff0c\u8b93\u6bcf\u4e00\u500b\u6c7a\u7b56\u80fd\u5920\u4f7f\u8a0a\u606f\u589e\u76ca\u6700\u5927\u5316\u3002\u5176\u7b97\u6cd5\u4e3b\u8981\u662f\u8a08\u7b97\u71b5\uff0c\u56e0\u6b64\u7d93\u7531\u6c7a\u7b56\u6a39\u5206\u5272\u5f8c\u7684\u8cc7\u8a0a\u91cf\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u800c Gini \u7684\u6578\u503c\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u4e82\uff0c\u6578\u503c\u7686\u70ba 0~1 \u4e4b\u9593\uff0c\u5176\u4e2d 0 \u4ee3\u8868\u8a72\u7279\u5fb5\u5728\u5e8f\u5217\u4e2d\u662f\u5b8c\u7f8e\u7684\u5206\u985e\u3002\u5e38\u898b\u7684\u8cc7\u8a0a\u91cf\u8a55\u4f30\u65b9\u6cd5\u6709\u5169\u7a2e\uff1a\u8cc7\u8a0a\u7372\u5229 (Information Gain) \u4ee5\u53ca Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)\u3002","title":"\u8a55\u4f30\u5206\u5272\u8cc7\u8a0a\u91cf"},{"location":"12.\u6c7a\u7b56\u6a39/#entropy","text":"\u71b5 (Entropy) \u662f\u8a08\u7b97 Information Gain \u7684\u4e00\u7a2e\u65b9\u6cd5\u3002\u5728\u4e86\u89e3 Information Gain \u4e4b\u524d\u8981\u5148\u4e86\u89e3\u71b5\u662f\u5982\u4f55\u88ab\u8a08\u7b97\u51fa\u4f86\u7684\u3002\u5176\u4e2d\u5728\u4e0b\u5716\u516c\u5f0f\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019 Entropy \u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642 Entropy \u5373\u70ba 1\u3002","title":"\u71b5 (Entropy)"},{"location":"12.\u6c7a\u7b56\u6a39/#gini-gini-impurity","text":"Gini \u4e0d\u7d14\u5ea6\u662f\u53e6\u4e00\u7a2e\u4e82\u5ea6\u7684\u8861\u91cf\u65b9\u5f0f\uff0c\u5b83\u7684\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684\u8cc7\u6599\u8d8a\u6df7\u4e82\u3002\u516c\u5f0f\u5982\u4e0b\u6240\u793a\uff0c\u5176\u4e2d p \u4ee3\u8868\u662f\u7684\u6a5f\u7387\u3001q \u70ba\u4ee3\u8868\u5426\u7684\u6a5f\u7387\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u5716\u4e2d\u7bc4\u4f8b\u5f88\u6e05\u695a\u5730\u77e5\u9053\u7576\u6240\u6709\u7684\u8cc7\u6599\u90fd\u88ab\u5206\u985e\u4e00\u81f4\u7684\u6642\u5019\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0\uff0c\u7576\u8cc7\u6599\u5404\u6709\u4e00\u534a\u4e0d\u540c\u6642\u6df7\u4e82\u7a0b\u5ea6\u5373\u70ba 0.5\u3002","title":"Gini \u4e0d\u7d14\u5ea6 (Gini Impurity)"},{"location":"12.\u6c7a\u7b56\u6a39/#_6","text":"\u6c7a\u7b56\u6a39\u8ff4\u6b78\u65b9\u6cd5\u8207\u5206\u985e\u6709\u9ede\u985e\u4f3c\u5dee\u5225\u50c5\u5728\u65bc\u8a55\u4f30\u5206\u679d\u597d\u58de\u7684\u65b9\u5f0f\u4e0d\u540c\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u4f5c\u8ff4\u6b78\u6a39\u3002\u7576\u6578\u64da\u96c6\u7684\u8f38\u51fa\u7232\u9023\u7e8c\u6027\u6578\u503c\u6642\uff0c\u8a72\u6a39\u7b97\u6cd5\u5c31\u662f\u4e00\u500b\u8ff4\u6b78\u6a39\u3002\u900f\u904e\u6a39\u7684\u5c55\u958b\uff0c\u4e26\u7528\u8449\u7bc0\u9ede\u7684\u5747\u503c\u4f5c\u7232\u9810\u6e2c\u503c\u3002\u5f9e\u6839\u7bc0\u9ede\u958b\u59cb\uff0c\u5c0d\u6a23\u672c\u7684\u67d0\u4e00\u7279\u5fb5\u9032\u884c\u6e2c\u8a66\u3002\u7d93\u904e\u8a55\u4f30\u5f8c\uff0c\u5c07\u6a23\u672c\u5206\u914d\u5230\u5176\u5b50\u7d50\u9ede\u3002\u6b64\u6642\u6bcf\u4e00\u500b\u5b50\u7bc0\u9ede\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u4e00\u500b\u503c\u3002\u4f9d\u7167\u9019\u6a23\u65b9\u5f0f\u9032\u884c\uff0c\u76f4\u81f3\u5230\u9054\u8449\u7d50\u9ede\u3002\u6b64\u6642\u8aa4\u5dee\u503c\u8981\u6700\u5c0f\u5316\uff0c\u4e26\u4e14\u8d8a\u63a5\u8fd1\u96f6\u8d8a\u597d\u3002 \u8ff4\u6b78\u6a39\u7684\u751f\u9577\u904e\u7a0b\u5f88\u63a8\u85a6\u770b \u9019\u7bc7 \u6587\u7ae0 \u4ee5\u4e0b\u8209\u4e00\u500b\u4f8b\u5b50\u5047\u8a2d x \u662f\u8f38\u5165 y \u662f\u8f38\u51fa\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u4e00\u500b\u5e73\u9762\u4e0a\u7e6a\u88fd\u51fa\u8cc7\u6599\u8207\u6b63\u78ba\u7b54\u6848\u9593\u7684\u5206\u4f48\u3002\u5047\u8a2d\u8ff4\u6b78\u6a39\u7684\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u5169\u5c64\u3002\u9996\u5148\u5728\u7b2c\u4e00\u5c64\u4e2d\u6703\u5c07\u6240\u6709\u7684\u8cc7\u6599\u5f9e\u4e2d\u9593\u5207\u4e00\u5200\u6b64\u65b7\u9ede\u70ba x=0.496 \u7576\u5927\u65bc\u8a2d\u5b9a\u7684\u503c\u7684\u6578\u64da\u9ede\u6703\u7e7c\u7e8c\u5f80\u53f3\u5b50\u6a39\u4e0b\u53bb\u5ef6\u4f38\uff0c\u53cd\u4e4b\u5c0f\u65bc 0.496 \u7684\u8cc7\u6599\u9ede\u6703\u5f80\u5de6\u5b50\u6a39\u8d70\u3002\u6b64\u6642\u5c07\u6703\u5207\u51fa\u4e00\u500b\u5206\u652f\u51fa\u4f86\u4e26\u5f80\u4e0b\u64f4\u5c55\u4e26\u5f62\u6210\u7b2c\u4e8c\u5c64\u7684\u6c7a\u7b56\u5206\u652f\u3002\u4e00\u76f4\u4e0d\u65b7\u6301\u7e8c\u62d3\u5c55\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u7d42\u6b62\uff0c\u6b64\u6642\u7684\u7bc0\u9ede\u5373\u70ba\u8449\u7bc0\u9ede\u4e5f\u5c31\u662f\u6700\u7d42\u7684\u6a21\u578b\u8f38\u51fa\u503c\u3002","title":"\u8ff4\u6b78\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_7","text":"\u5047\u8a2d\u6211\u5011\u751f\u6210\u4e00\u500b f(x) = 3x+15 + noise \u7684\u8cc7\u6599\uff0c\u5176\u4e2d noise \u70ba\u4e00\u500b 0~1 \u4e4b\u9593\u7684\u96a8\u6a5f\u6578\u3002\u5f9e\u4ee5\u4e0b\u7684\u6e2c\u8a66\u53ef\u4ee5\u770b\u51fa\u96a8\u8457\u6c7a\u7b56\u6a39\u6df1\u5ea6\u7684\u589e\u52a0\uff0c\u6c7a\u7b56\u6a39\u7684\u64ec\u5408\u80fd\u529b\u4e0d\u65b7\u4e0a\u5347\u3002\u6c7a\u7b56\u6a39\u5df2\u7d93\u4e0d\u50c5\u50c5\u64ec\u5408\u4e86\u6211\u5011\u7684\u7dda\u6027\u51fd\u5f0f 3x+15 \uff0c\u540c\u6642\u4e5f\u64ec\u5408\u4e86\u6211\u5011\u6dfb\u52a0\u7684\u566a\u97f3(noise)\u3002","title":"\u6a39\u8d8a\u6df1\u6a21\u578b\u8d8a\u8907\u96dc"},{"location":"12.\u6c7a\u7b56\u6a39/#_8","text":"\u5728\u5206\u985e\u6a21\u578b\u4e2d\u6c7a\u7b56\u6a39\u662f\u4ee5\u4e82\u5ea6\u4f5c\u70ba\u6c7a\u7b56\u6a39\u751f\u6210\u6642\u5019\u7684\u8a55\u4f30\u6307\u6a19\u3002\u4f46\u662f\u8ff4\u6b78\u6a39\u900f\u904e\u662f MSE \u6216 MAE \u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u4e26\u627e\u51fa\u8aa4\u5dee\u6700\u5c0f\u7684\u503c\u4f5c\u70ba\u6a39\u7684\u7279\u5fb5\u9078\u64c7\u8207\u5207\u5272\u9ede\u3002\u5176\u4e2d\u524d\u8005\u662f\u5747\u65b9\u5dee\uff0c\u5f8c\u8005\u662f\u548c\u5747\u503c\u4e4b\u5dee\u7684\u7d55\u5c0d\u503c\u4e4b\u548c\u3002","title":"\u8ff4\u6b78\u6a39\u8a72\u5982\u4f55\u9078\u64c7\u5207\u5272\u9ede?"},{"location":"12.\u6c7a\u7b56\u6a39/#cart","text":"\u5728 Sklearn \u5957\u4ef6\u4e2d\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u662f\u63a1\u7528 CART (Classification and Regression Tree) \u6f14\u7b97\u6cd5\uff0c\u4e26\u4e14\u53ef\u4ee5\u88ab\u62ff\u4f86\u505a\u5206\u985e\u548c\u8ff4\u6b78\u7684\u9810\u6e2c\u3002\u5728\u6c7a\u7b56\u6a39\u7684\u6bcf\u4e00\u500b\u7bc0\u9ede\u4e0a\u90fd\u662f\u63a1\u7528\u4e8c\u5206\u6cd5\uff0c\u4e5f\u5c31\u662f\u6bcf\u4e00\u500b\u6c7a\u7b56\u7bc0\u9ede\u53ea\u5206\u679d\u51fa\u5169\u500b\u5b50\u7bc0\u9ede\u3002\u4e26\u4e14\u4e0d\u65b7\u5730\u5f80\u4e0b\u62d3\u5c55\uff0c\u76f4\u5230\u8a2d\u5b9a\u7684\u6700\u5927\u6df1\u5ea6\u70ba\u6b62\uff0c\u6b64\u6642\u6700\u5927\u6df1\u5ea6\u7684\u7bc0\u9ede\u7a31\u70ba\u8449\u7bc0\u9ede\u5373\u70ba\u6a21\u578b\u7684\u9810\u6e2c\u8f38\u51fa\u3002","title":"CART \u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_9","text":"\u5efa\u7acb\u6c7a\u7b56\u6a39\u7684\u904e\u7a0b\u5c31\u662f\u4e0d\u65b7\u7684\u5c0b\u627e\u7279\u5fb5\u9032\u884c\u6c7a\u7b56\uff0c\u900f\u904e\u9019\u4e9b\u6c7a\u7b56\u76e1\u91cf\u7684\u4f7f\u9019\u4e9b\u8cc7\u6599\u88ab\u5206\u70ba\u540c\u4e00\u500b\u985e\u5225\uff0c\u4e14\u8a66\u8457\u8b93\u6df7\u4e82\u7a0b\u5ea6\u8d8a\u5c0f\u8d8a\u597d\u3002\u5207\u8a18\u6a39\u7684\u6df1\u5ea6\u8d8a\u6df1\u4e0d\u4e00\u5b9a\u8d8a\u597d\uff0c\u4ed6\u53ef\u80fd\u6703\u9020\u6210\u904e\u5ea6\u64ec\u5408\u7684\u554f\u984c\u3002\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6211\u5011\u80fd\u5920\u8996\u89ba\u5316\u6c7a\u7b56\u6a39\u7684\u7d50\u69cb\uff0c\u76f8\u5c0d\u7684\u53ef\u89e3\u91cb\u6027\u5c31\u8b8a\u9ad8\u3002\u6b64\u5916\u8207\u5176\u5b83\u7684ML\u6a21\u578b\u6bd4\u8f03\u8d77\u4f86\uff0c\u6c7a\u7b56\u6a39\u57f7\u884c\u901f\u5ea6\u662f\u5b83\u7684\u4e00\u5927\u512a\u52e2\u3002\u56e0\u70ba\u662f\u6a39\u72c0\u7d50\u69cb\uff0c\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u6642\u5019\u6bcf\u500b\u6c7a\u7b56\u968e\u6bb5\u90fd\u76f8\u7576\u7684\u660e\u78ba\u6e05\u695a\uff0c\u4e0d\u662f 0 \u5c31\u662f 1\u3002","title":"\u6c7a\u7b56\u6a39\u6a21\u578b\u7684\u512a\u7f3a\u9ede"},{"location":"12.\u6c7a\u7b56\u6a39/#_10","text":"\u6c7a\u7b56\u6a39\u900f\u904e\u6240\u6709\u7279\u5fb5\u8207\u5c0d\u61c9\u7684\u503c\u5c07\u8cc7\u6599\u5207\u5206\uff0c\u4f86\u627e\u51fa\u6700\u9069\u5408\u7684\u5206\u679d\u4e26\u7e7c\u7e8c\u5f80\u4e0b\u62d3\u5c55\u3002\u82e5\u6c7a\u7b56\u6a39\u6df1\u5ea6\u8d8a\u6df1\u5247\u6c7a\u7b56\u7684\u898f\u5247\u5c07\u8d8a\u8907\u96dc\uff0c\u6a21\u578b\u9810\u6e2c\u4e5f\u6703\u8d8a\u63a5\u8fd1\u771f\u5be6\u7b54\u6848\u3002\u4f46\u82e5\u8a13\u7df4\u96c6\u4e2d\u542b\u6709\u904e\u591a\u7684\u96dc\u8a0a\uff0c\u592a\u6df1\u7684\u6a39\u5c31\u6709\u53ef\u80fd\u7522\u751f\u904e\u64ec\u5408\u7684\u60c5\u5f62\u3002\u56e0\u6b64\u55ae\u4e00\u7684\u6c7a\u7b56\u6a39\u80af\u5b9a\u662f\u4e0d\u5920\u7528\u7684\uff0c\u6211\u5011\u53ef\u4ee5\u5229\u7528\u96c6\u6210\u5b78\u7fd2\u4e2d\u7684 Boosting \u67b6\u69cb\uff0c\u5c0d\u8ff4\u6b78\u6a39\u9032\u884c\u6539\u826f\u5347\u7d1a\u3002","title":"\u6c7a\u7b56\u6a39\u7e3d\u7d50"},{"location":"12.\u6c7a\u7b56\u6a39/#_11","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"12.\u6c7a\u7b56\u6a39/#_12","text":"\u4e00\u500b\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8a13\u7df4\u8cc7\u6599\u81ea\u52d5\u7522\u751f\u4e00\u68f5\u6a39\u3002\u6c7a\u7b56\u6a39\u6703\u6839\u64da\u8cc7\u6599\u7522\u751f\u5f88\u591a\u6a39\u72c0\u7684\u898f\u5247\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u898f\u5247\u6703\u5c0d\u65b0\u6a23\u672c\u9032\u884c\u9810\u6e2c\u3002 Parameters: - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeClassifier # \u5efa\u7acb DecisionTreeClassifier \u6a21\u578b decisionTreeModel = DecisionTreeClassifier ( criterion = 'entropy' , max_depth = 6 , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( train_reduced , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = decisionTreeModel . predict ( train_reduced ) # \u8a08\u7b97\u6e96\u78ba\u7387 accuracy = decisionTreeModel . score ( train_reduced , y_train ) \u6211\u5011\u900f\u904e\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u4e26\u8a13\u7df4\u4e00\u500b\u6c7a\u7b56\u6a39\u6a21\u578b\u3002\u900f\u904e\u7e6a\u88fd\u8a13\u7df4\u6c7a\u7b56\u908a\u754c\u53ef\u4ee5\u770b\u5230\uff0c\u5728\u4e0b\u5716\u53f3\u624b\u908a\u7684\u8a13\u7df4\u96c6\u5b8c\u6574\u5730\u5c07\u4e09\u500b\u985e\u5225\u5207\u5272\u958b\u4f86\u3002\u800c\u5728\u53f3\u908a\u7684\u6e2c\u8a66\u96c6\u4e2d\u50c5\u6709\u4e00\u7b46\u7d05\u8272\u6846\u8d77\u4f86\u7684\u8cc7\u6599\u9810\u6e2c\u932f\u8aa4\u3002","title":"\u5206\u985e\u6c7a\u7b56\u6a39"},{"location":"12.\u6c7a\u7b56\u6a39/#_13","text":"Parameters: - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/friedman_mse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.tree import DecisionTreeRegressor # \u5efa\u7acb DecisionTreeRegressor \u6a21\u578b decisionTreeModel = DecisionTreeRegressor ( criterion = 'mse' , max_depth = 4 , splitter = 'best' , random_state = 42 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b decisionTreeModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = decisionTreeModel . predict ( x ) \u5728\u8ff4\u6b78\u6c7a\u7b56\u6a39\u4e2d\u6211\u5011\u4f7f\u7528\u4e86\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u8207\u975e\u7dda\u6027\u8ff4\u6b78\u5169\u7a2e\u8cc7\u6599\u96c6\u9032\u884c\u6578\u64da\u64ec\u5408\u5be6\u9a57\u3002\u5728\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u4e2d\u6211\u5011\u5c07\u6578\u64da\u9ede\u6dfb\u52a0\u4e00\u4e9b\u566a\u97f3\u8b93\u8cc7\u6599\u5206\u5e03\u5728\u659c\u76f4\u7dda\u4e0a\u3002\u5de6\u5716\u662f\u8ff4\u6b78\u6a39\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u8a13\u7df4\u7d50\u679c\uff0c\u53ef\u4ee5\u96b1\u7d04\u5730\u770b\u5230\u6a21\u578b\u6c7a\u7b56\u7684\u65b9\u5f0f\u5448\u73fe\u968e\u68af\u72c0\u614b\u3002\u5982\u679c\u6211\u5011\u5617\u8a66\u7684\u5c07\u6578\u7684\u6df1\u5ea6\u589e\u52a0\uff0c\u6a21\u578b\u76f8\u5c0d\u8907\u96dc\u56e0\u6b64\u53ef\u4ee5\u64ec\u5408\u5f97\u66f4\u597d\u3002\u800c\u53f3\u908a\u662f\u900f\u904e\u96a8\u6a5f\u7522\u751f\u7684\u975e\u7dda\u6027\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u5728\u6700\u5927\u6df1\u5ea6\u70ba 4 \u7684\u6642\u5019\uff0c\u8a13\u7df4\u7d50\u679c\u5c31\u9084\u4e0d\u932f\u4e86\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u770b\u770b\u8abf\u6574\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u4ee5\u53ca\u5176\u4ed6\u7684\u8d85\u53c3\u6578\u5c0d\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u7684\u5f71\u97ff\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u8ff4\u6b78\u6c7a\u7b56\u6a39"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/","text":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u6574\u9ad4\u5b78\u7fd2 \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u4e09\u7a2e\u4e0d\u540c\u7684\u6574\u9ad4\u5b78\u7fd2 Bagging\u3001Boosting\u3001Stacking \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u53c8\u7a31\u96c6\u6210\u5b78\u7fd2\u3001\u6574\u5408\u5b78\u7fd2\uff0c\u6307\u7684\u662f\u4ee5\u4e00\u500b\u7cfb\u7d71\u5316\u7684\u65b9\u5f0f\u5c07\u597d\u5e7e\u500b\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u6a21\u578b\u7d50\u5408\u5728\u4e00\u8d77\uff0c\u76ee\u7684\u662f\u5e0c\u671b\u7d50\u5408\u773e\u591a\u7684\u6a21\u578b\u7522\u751f\u4e00\u500b\u66f4\u5f37\u5927\u7684\u6a21\u578b\u3002\u5728\u8a31\u591a\u79d1\u5b78\u7af6\u8cfd\u4e2d Ensemble Learning \u5728\u5be6\u52d9\u4e0a\u662f\u975e\u5e38\u6709\u6548\u7684\u63d0\u5347\u9810\u6e2c\u6e96\u78ba\u7387\u3002\u4f9d\u7167 Ensemble \u7684\u8655\u7406\u65b9\u5f0f\u7684\u4e0d\u540c\uff0c\u6211\u5011\u53ef\u4ee5\u5c07\u5b83\u5206\u70ba\u4e09\u985e\u3002\u7b2c\u4e00\u985e\u70ba Bagging\uff0c\u7b2c\u4e8c\u985e\u70ba Boosting\uff0c\u7b2c\u4e09\u985e\u70ba Stacking\u3002 Bagging: Random forest Boosting: AdaBoost Gradient Boosting XGBoost Stacking Bagging \u81ea\u52a9\u91cd\u62bd\u7e3d\u5408\u6cd5 Bagging \u6307\u7684\u662f\u6211\u5011\u628a\u8a13\u7df4\u8cc7\u6599\u91cd\u65b0\u63a1\u6a23\u7522\u751f\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u5982\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Bagging \u4e4b\u67b6\u69cb\u3002\u6839\u64da\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\u5373\u4f7f\u6211\u5011\u7528\u540c\u4e00\u7a2e\u6f14\u7b97\u6cd5\u6211\u5011\u4e5f\u6703\u5f97\u5230\u4e0d\u4e00\u6a23\u7684\u6a21\u578b\uff0c\u4ed6\u7684\u6a39\u662f\u5404\u81ea\u7368\u7acb\u56e0\u6b64\u53ef\u4ee5\u5e73\u884c\u5316\u8655\u7406\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u662f\u96a8\u6a5f\u68ee\u6797\uff0c\u96a8\u6a5f\u68ee\u6797\u9664\u4e86 Bagging \u4e4b\u5916\uff0c\u9084\u6709\u53e6\u4e00\u500b\u96a8\u6a5f\u7684\u56e0\u7d20\u662f\u6bcf\u4e00\u68f5\u6a39\u90fd\u53ea\u80fd\u770b\u5230\u4e00\u90e8\u5206\u7684\u7279\u5fb5\uff0c\u9019\u4e9b\u7279\u5fb5\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a\u7684\u3002 Boosting \u63a8\u5347\u6cd5 Boosting \u5247\u6703\u6839\u64da\u6bcf\u4e00\u7b46\u8a13\u7df4\u8cc7\u6599\u7684\u96e3\u6216\u7c21\u55ae\u7d66\u4e88\u4e0d\u540c\u7684\u6b0a\u91cd\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u9996\u5148\u6211\u5011\u6703\u8a13\u7df4\u4e00\u500b base learner \u7136\u5f8c\u6839\u64da base learner \u9810\u6e2c\u7684\u7d50\u679c\u5c0d\u6216\u932f\u4f86\u5206\u8fa8\u8a72\u7b46\u8cc7\u6599\u662f\u4e00\u500b\u7c21\u55ae\u9084\u662f\u56f0\u96e3\u7684\u8cc7\u6599\u3002\u5c0d\u65bc\u96e3\u7684\u8cc7\u6599\u6211\u5011\u52a0\u5f37\u4ed6\u7684\u6b0a\u91cd\u518d\u8a13\u7df4\u4e00\u500b\u65b0\u7684\u5206\u985e\u5668\u6216\u8ff4\u6b78\u5668\u3002\u6211\u5011\u76ee\u6a19\u662f\u5e0c\u671b\u8a13\u7df4\u5f8c\uff0c\u65b0\u7684\u6a21\u578b\u5728\u9019\u4e9b\u96e3\u7684\u8cc7\u6599\u80fd\u5920\u8868\u73fe\u5f97\u66f4\u597d\u3002\u6211\u5011\u4e0d\u65b7\u91cd\u8907\u9019\u4e9b\u6b65\u9a5f\uff0c\u4e0d\u65b7\u5730\u52a0\u5165\u65b0\u7684 base learner\uff0c\u4e14\u65b0\u7684 base learner \u628a\u904e\u53bb\u8868\u73fe\u4e0d\u597d\u7684\u5730\u65b9\u6539\u5584\uff0c\u9019\u5c31\u662f Boosting \u7cbe\u795e\u3002\u56e0\u6b64 Boosting \u7684\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u6709\u95dc\u806f\u6027\u7684\u505a\u5b8c\u7b2c\u4e00\u68f5\u6a39\u53ef\u80fd\u9032\u884c\u4e0b\u4e00\u68f5\u6a39\u7684\u751f\u6210\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u6709 AdaBoost\u3001Gradient Boosting\uff0c\u5169\u7a2e\u90fd\u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u90fd\u662f\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u76ee\u6a19\u662f\u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6240\u4ee5\u6700\u7d42\u6211\u5011\u8981\u628a\u9019\u9ebc\u591a\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u3002 Stacking \u5806\u758a\u6cd5 \u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Stacking \u67b6\u69cb\u3002Stacking \u9996\u5148\u7522\u751f\u51fa m \u500b\u6a21\u578b\uff0c\u5f7c\u6b64\u9593\u4e26\u4e92\u76f8\u7121\u95dc\u9023\uff0c\u4f8b\u5982\u7b2c\u4e00\u500b\u6a21\u578b\u70ba logistic regression \u7b2c\u4e8c\u500b\u70ba\u6c7a\u7b56\u6a39\u3002\u8a13\u7df4\u5b8c m \u500b\u6a21\u578b\u5f8c\uff0c\u6211\u5011\u8981\u628a\u9019 m \u500b\u6a21\u578b\u5408\u4f75\u5728\u4e00\u8d77\u3002\u5408\u4f75\u7684\u65b9\u5f0f\u662f\u6211\u5011\u518d\u53e6\u5916\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u9019\u500b\u6a21\u578b\u628am\u500b\u6a21\u578b\u7684\u8f38\u51fa\u7576\u6210\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u56e0\u6b64\u6211\u5011\u6703\u6839\u64da\u9019m\u500b\u7279\u5fb5\u5229\u7528\u6574\u9ad4\u5b78\u7fd2\u5176\u4e2d\u7684\u6f14\u7b97\u6cd5\u4f86\u5b78\u7fd2\u4e00\u500b\u6a21\u578b\u4e26\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002 \u5340\u57df\u5b78\u7fd2 (Patch Learning) \u5340\u57df\u5b78\u7fd2 ( Patch Learning , PL) \u65bc 2019 \u5e74\u7531\u7f8e\u570b\u5357\u52a0\u5dde\u5927\u5b78 Jerry M. Mendel \u8207 Dongrui Wu \u6240\u63d0\u51fa\u7684\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u65b9\u6cd5\u3002\u6240\u8b02\u7684\u5340\u57df\u5b78\u7fd2\u662f\u80fd\u5920\u6709\u6548\u7684\u638c\u63e1\u8868\u73fe\u4e0d\u597d\u7684\u5340\u9593\uff0c\u7d93\u904e\u8a13\u7df4\u4e00\u500b\u5168\u57df\u7684\u6a21\u578b\u5f8c\u4e26\u4efb\u4e00\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u627e\u51fa\u9019\u4e9b\u8aa4\u5dee\u5927\u7684 Patch\uff0c\u900f\u904e\u591a\u500b\u65b7\u9ede\u7684\u5b78\u7fd2\u6211\u5011\u6703\u5f97\u5230 Global Model\u3001Patch1 Model\u3001Patch2 Model\u2026Patch(n) Model\u3002\u7136\u800c\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u6709\u5f88\u591a\u7a2e\u65b9\u6cd5\u53ef\u4ee5\u6539\u5584\u6211\u5011\u7684\u6a21\u578b\uff0c\u4f8b\u5982\u52a0\u6df1\u548c\u52a0\u5bec\u795e\u7d93\u7db2\u8def\u6216\u662f\u6dfb\u52a0\u4e00\u4e9b\u975e\u7dda\u6027\u7684\u6fc0\u52f5\u51fd\u6578\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u6216\u662f\u4f7f\u7528\u6574\u9ad4\u5b78\u7fd2\u7684\u65b9\u6cd5\u96c6\u5408\u8a31\u591a\u7b56\u7565\uff0c\u6700\u7d42\u5f62\u6210\u4e00\u500b\u5f37\u5b78\u7fd2\u5668\u4e26\u6539\u5584\u67d0\u4e9b\u5340\u57df\u7684\u5f31\u9ede\u3002\u4e0b\u5716\u70ba\u4e00\u500b\u7c21\u55ae\u7684\u5340\u57df\u5b78\u7fd2\u9810\u6e2c\u6d41\u7a0b\u5716\u3002\u5728\u4f7f\u7528\u5340\u57df\u5b78\u7fd2\u6a21\u578b\u524d\u6211\u5011\u8981\u627e\u51fa\u8a72\u6bb5\u8f38\u5165\u6240\u5c0d\u61c9\u7684 Patch\uff0c\u82e5\u8a72\u5340\u9593\u525b\u597d\u843d\u65bc\u6240\u5283\u5b9a\u7684\u7bc4\u570d\u5167\uff0c\u9019\u4e9b\u8f38\u5165\u5c31\u5c07\u6703\u5c0d\u61c9\u5230\u76f8\u5c0d\u61c9\u7684\u5340\u57df\u5b78\u7fd2\uff0c\u5426\u5247\u5c31\u6703\u4f7f\u7528\u5168\u57df\u6a21\u578b\u9032\u884c\u9810\u6e2c\u3002 \u5340\u57df\u5b78\u7fd2\u6a5f\u5236\u5305\u62ec\u4e09\u500b\u90e8\u5206\uff0c\u7c21\u8981\u8aaa\u660e\u5982\u4e0b: (1)\u4f7f\u7528\u6240\u6709\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u4e00\u500b\u5168\u57df\u6a21\u578b; (2)\u6311\u51fa\u5f71\u97ff\u932f\u8aa4\u7387\u8f03\u9ad8\u7684\u8cc7\u6599\uff0c\u518d\u653e\u5165\u500b\u5225\u7684 Patch \u6a21\u578b\u9032\u884c\u5340\u57df\u8a13\u7df4; (3)\u81ea\u8a13\u7df4\u8cc7\u6599\u4e2d\u53bb\u6389\u5df2\u7d93\u88ab\u5c40\u90e8\u6a21\u578b\u7528\u904e\u7684\u8cc7\u6599\uff0c\u518d\u4f7f\u7528\u5269\u4e0b\u7684\u6240\u6709\u8cc7\u6599\u66f4\u65b0\u5168\u57df\u6a21\u578b\u3002\u7576\u8f38\u5165\u8cc7\u6599\u9032\u4f86\u6642\uff0c\u9996\u5148\u5224\u65b7\u9019\u500b\u8f38\u5165\u662f\u4e0d\u662f\u5728\u525b\u525b\u8a18\u4e0b\u7684 Patch \u6a21\u578b\u4e2d\uff0c\u5982\u679c\u662f\u7684\u8a71\uff0c\u5c31\u57f7\u884c Patch \u6a21\u578b\u3002\u5982\u679c\u4e0d\u662f\u7684\u8a71\uff0c\u57f7\u884c\u66f4\u65b0\u5f8c\u7684\u5168\u57df\u6a21\u578b\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning)"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#day-13-ensemble-learning","text":"","title":"[Day 13] \u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning)"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#_1","text":"\u4e86\u89e3\u6574\u9ad4\u5b78\u7fd2 \u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f \u4e09\u7a2e\u4e0d\u540c\u7684\u6574\u9ad4\u5b78\u7fd2 Bagging\u3001Boosting\u3001Stacking","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#_2","text":"\u6574\u9ad4\u5b78\u7fd2 (Ensemble Learning) \u53c8\u7a31\u96c6\u6210\u5b78\u7fd2\u3001\u6574\u5408\u5b78\u7fd2\uff0c\u6307\u7684\u662f\u4ee5\u4e00\u500b\u7cfb\u7d71\u5316\u7684\u65b9\u5f0f\u5c07\u597d\u5e7e\u500b\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u6a21\u578b\u7d50\u5408\u5728\u4e00\u8d77\uff0c\u76ee\u7684\u662f\u5e0c\u671b\u7d50\u5408\u773e\u591a\u7684\u6a21\u578b\u7522\u751f\u4e00\u500b\u66f4\u5f37\u5927\u7684\u6a21\u578b\u3002\u5728\u8a31\u591a\u79d1\u5b78\u7af6\u8cfd\u4e2d Ensemble Learning \u5728\u5be6\u52d9\u4e0a\u662f\u975e\u5e38\u6709\u6548\u7684\u63d0\u5347\u9810\u6e2c\u6e96\u78ba\u7387\u3002\u4f9d\u7167 Ensemble \u7684\u8655\u7406\u65b9\u5f0f\u7684\u4e0d\u540c\uff0c\u6211\u5011\u53ef\u4ee5\u5c07\u5b83\u5206\u70ba\u4e09\u985e\u3002\u7b2c\u4e00\u985e\u70ba Bagging\uff0c\u7b2c\u4e8c\u985e\u70ba Boosting\uff0c\u7b2c\u4e09\u985e\u70ba Stacking\u3002 Bagging: Random forest Boosting: AdaBoost Gradient Boosting XGBoost Stacking","title":"\u4f55\u8b02\u6574\u9ad4\u5b78\u7fd2\uff1f"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#bagging","text":"Bagging \u6307\u7684\u662f\u6211\u5011\u628a\u8a13\u7df4\u8cc7\u6599\u91cd\u65b0\u63a1\u6a23\u7522\u751f\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u5982\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Bagging \u4e4b\u67b6\u69cb\u3002\u6839\u64da\u4e0d\u540c\u7d44\u7684\u8a13\u7df4\u8cc7\u6599\u5373\u4f7f\u6211\u5011\u7528\u540c\u4e00\u7a2e\u6f14\u7b97\u6cd5\u6211\u5011\u4e5f\u6703\u5f97\u5230\u4e0d\u4e00\u6a23\u7684\u6a21\u578b\uff0c\u4ed6\u7684\u6a39\u662f\u5404\u81ea\u7368\u7acb\u56e0\u6b64\u53ef\u4ee5\u5e73\u884c\u5316\u8655\u7406\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u662f\u96a8\u6a5f\u68ee\u6797\uff0c\u96a8\u6a5f\u68ee\u6797\u9664\u4e86 Bagging \u4e4b\u5916\uff0c\u9084\u6709\u53e6\u4e00\u500b\u96a8\u6a5f\u7684\u56e0\u7d20\u662f\u6bcf\u4e00\u68f5\u6a39\u90fd\u53ea\u80fd\u770b\u5230\u4e00\u90e8\u5206\u7684\u7279\u5fb5\uff0c\u9019\u4e9b\u7279\u5fb5\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a\u7684\u3002","title":"Bagging \u81ea\u52a9\u91cd\u62bd\u7e3d\u5408\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#boosting","text":"Boosting \u5247\u6703\u6839\u64da\u6bcf\u4e00\u7b46\u8a13\u7df4\u8cc7\u6599\u7684\u96e3\u6216\u7c21\u55ae\u7d66\u4e88\u4e0d\u540c\u7684\u6b0a\u91cd\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u9996\u5148\u6211\u5011\u6703\u8a13\u7df4\u4e00\u500b base learner \u7136\u5f8c\u6839\u64da base learner \u9810\u6e2c\u7684\u7d50\u679c\u5c0d\u6216\u932f\u4f86\u5206\u8fa8\u8a72\u7b46\u8cc7\u6599\u662f\u4e00\u500b\u7c21\u55ae\u9084\u662f\u56f0\u96e3\u7684\u8cc7\u6599\u3002\u5c0d\u65bc\u96e3\u7684\u8cc7\u6599\u6211\u5011\u52a0\u5f37\u4ed6\u7684\u6b0a\u91cd\u518d\u8a13\u7df4\u4e00\u500b\u65b0\u7684\u5206\u985e\u5668\u6216\u8ff4\u6b78\u5668\u3002\u6211\u5011\u76ee\u6a19\u662f\u5e0c\u671b\u8a13\u7df4\u5f8c\uff0c\u65b0\u7684\u6a21\u578b\u5728\u9019\u4e9b\u96e3\u7684\u8cc7\u6599\u80fd\u5920\u8868\u73fe\u5f97\u66f4\u597d\u3002\u6211\u5011\u4e0d\u65b7\u91cd\u8907\u9019\u4e9b\u6b65\u9a5f\uff0c\u4e0d\u65b7\u5730\u52a0\u5165\u65b0\u7684 base learner\uff0c\u4e14\u65b0\u7684 base learner \u628a\u904e\u53bb\u8868\u73fe\u4e0d\u597d\u7684\u5730\u65b9\u6539\u5584\uff0c\u9019\u5c31\u662f Boosting \u7cbe\u795e\u3002\u56e0\u6b64 Boosting \u7684\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u6709\u95dc\u806f\u6027\u7684\u505a\u5b8c\u7b2c\u4e00\u68f5\u6a39\u53ef\u80fd\u9032\u884c\u4e0b\u4e00\u68f5\u6a39\u7684\u751f\u6210\u3002\u4ee3\u8868\u7684\u65b9\u6cd5\u6709 AdaBoost\u3001Gradient Boosting\uff0c\u5169\u7a2e\u90fd\u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u90fd\u662f\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u76ee\u6a19\u662f\u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6240\u4ee5\u6700\u7d42\u6211\u5011\u8981\u628a\u9019\u9ebc\u591a\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u3002","title":"Boosting \u63a8\u5347\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#stacking","text":"\u4e0b\u5716\u70ba\u6574\u9ad4\u5b78\u7fd2 Stacking \u67b6\u69cb\u3002Stacking \u9996\u5148\u7522\u751f\u51fa m \u500b\u6a21\u578b\uff0c\u5f7c\u6b64\u9593\u4e26\u4e92\u76f8\u7121\u95dc\u9023\uff0c\u4f8b\u5982\u7b2c\u4e00\u500b\u6a21\u578b\u70ba logistic regression \u7b2c\u4e8c\u500b\u70ba\u6c7a\u7b56\u6a39\u3002\u8a13\u7df4\u5b8c m \u500b\u6a21\u578b\u5f8c\uff0c\u6211\u5011\u8981\u628a\u9019 m \u500b\u6a21\u578b\u5408\u4f75\u5728\u4e00\u8d77\u3002\u5408\u4f75\u7684\u65b9\u5f0f\u662f\u6211\u5011\u518d\u53e6\u5916\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u9019\u500b\u6a21\u578b\u628am\u500b\u6a21\u578b\u7684\u8f38\u51fa\u7576\u6210\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u56e0\u6b64\u6211\u5011\u6703\u6839\u64da\u9019m\u500b\u7279\u5fb5\u5229\u7528\u6574\u9ad4\u5b78\u7fd2\u5176\u4e2d\u7684\u6f14\u7b97\u6cd5\u4f86\u5b78\u7fd2\u4e00\u500b\u6a21\u578b\u4e26\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002","title":"Stacking \u5806\u758a\u6cd5"},{"location":"13.\u6574\u9ad4\u5b78\u7fd2/#patch-learning","text":"\u5340\u57df\u5b78\u7fd2 ( Patch Learning , PL) \u65bc 2019 \u5e74\u7531\u7f8e\u570b\u5357\u52a0\u5dde\u5927\u5b78 Jerry M. Mendel \u8207 Dongrui Wu \u6240\u63d0\u51fa\u7684\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u65b9\u6cd5\u3002\u6240\u8b02\u7684\u5340\u57df\u5b78\u7fd2\u662f\u80fd\u5920\u6709\u6548\u7684\u638c\u63e1\u8868\u73fe\u4e0d\u597d\u7684\u5340\u9593\uff0c\u7d93\u904e\u8a13\u7df4\u4e00\u500b\u5168\u57df\u7684\u6a21\u578b\u5f8c\u4e26\u4efb\u4e00\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u627e\u51fa\u9019\u4e9b\u8aa4\u5dee\u5927\u7684 Patch\uff0c\u900f\u904e\u591a\u500b\u65b7\u9ede\u7684\u5b78\u7fd2\u6211\u5011\u6703\u5f97\u5230 Global Model\u3001Patch1 Model\u3001Patch2 Model\u2026Patch(n) Model\u3002\u7136\u800c\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u6709\u5f88\u591a\u7a2e\u65b9\u6cd5\u53ef\u4ee5\u6539\u5584\u6211\u5011\u7684\u6a21\u578b\uff0c\u4f8b\u5982\u52a0\u6df1\u548c\u52a0\u5bec\u795e\u7d93\u7db2\u8def\u6216\u662f\u6dfb\u52a0\u4e00\u4e9b\u975e\u7dda\u6027\u7684\u6fc0\u52f5\u51fd\u6578\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u6216\u662f\u4f7f\u7528\u6574\u9ad4\u5b78\u7fd2\u7684\u65b9\u6cd5\u96c6\u5408\u8a31\u591a\u7b56\u7565\uff0c\u6700\u7d42\u5f62\u6210\u4e00\u500b\u5f37\u5b78\u7fd2\u5668\u4e26\u6539\u5584\u67d0\u4e9b\u5340\u57df\u7684\u5f31\u9ede\u3002\u4e0b\u5716\u70ba\u4e00\u500b\u7c21\u55ae\u7684\u5340\u57df\u5b78\u7fd2\u9810\u6e2c\u6d41\u7a0b\u5716\u3002\u5728\u4f7f\u7528\u5340\u57df\u5b78\u7fd2\u6a21\u578b\u524d\u6211\u5011\u8981\u627e\u51fa\u8a72\u6bb5\u8f38\u5165\u6240\u5c0d\u61c9\u7684 Patch\uff0c\u82e5\u8a72\u5340\u9593\u525b\u597d\u843d\u65bc\u6240\u5283\u5b9a\u7684\u7bc4\u570d\u5167\uff0c\u9019\u4e9b\u8f38\u5165\u5c31\u5c07\u6703\u5c0d\u61c9\u5230\u76f8\u5c0d\u61c9\u7684\u5340\u57df\u5b78\u7fd2\uff0c\u5426\u5247\u5c31\u6703\u4f7f\u7528\u5168\u57df\u6a21\u578b\u9032\u884c\u9810\u6e2c\u3002 \u5340\u57df\u5b78\u7fd2\u6a5f\u5236\u5305\u62ec\u4e09\u500b\u90e8\u5206\uff0c\u7c21\u8981\u8aaa\u660e\u5982\u4e0b: (1)\u4f7f\u7528\u6240\u6709\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u4e00\u500b\u5168\u57df\u6a21\u578b; (2)\u6311\u51fa\u5f71\u97ff\u932f\u8aa4\u7387\u8f03\u9ad8\u7684\u8cc7\u6599\uff0c\u518d\u653e\u5165\u500b\u5225\u7684 Patch \u6a21\u578b\u9032\u884c\u5340\u57df\u8a13\u7df4; (3)\u81ea\u8a13\u7df4\u8cc7\u6599\u4e2d\u53bb\u6389\u5df2\u7d93\u88ab\u5c40\u90e8\u6a21\u578b\u7528\u904e\u7684\u8cc7\u6599\uff0c\u518d\u4f7f\u7528\u5269\u4e0b\u7684\u6240\u6709\u8cc7\u6599\u66f4\u65b0\u5168\u57df\u6a21\u578b\u3002\u7576\u8f38\u5165\u8cc7\u6599\u9032\u4f86\u6642\uff0c\u9996\u5148\u5224\u65b7\u9019\u500b\u8f38\u5165\u662f\u4e0d\u662f\u5728\u525b\u525b\u8a18\u4e0b\u7684 Patch \u6a21\u578b\u4e2d\uff0c\u5982\u679c\u662f\u7684\u8a71\uff0c\u5c31\u57f7\u884c Patch \u6a21\u578b\u3002\u5982\u679c\u4e0d\u662f\u7684\u8a71\uff0c\u57f7\u884c\u66f4\u65b0\u5f8c\u7684\u5168\u57df\u6a21\u578b\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5340\u57df\u5b78\u7fd2 (Patch Learning)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/","text":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u96a8\u6a5f\u68ee\u6797\u4ecb\u7d39 \u96a8\u6a5f\u68ee\u6797\u7684\u6a39\u662f\u5982\u4f55\u751f\u6210\uff1f\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u96a8\u6a5f\u68ee\u6797\u5206\u985e\u5668 \u6bd4\u8f03\u96a8\u6a5f\u68ee\u6797\u8207\u6c7a\u7b56\u6a39\u5169\u8005\u5dee\u5225\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Regression)\uff1a \u96a8\u6a5f\u68ee\u6797 \u96a8\u6a5f\u68ee\u6797\u5176\u5be6\u5c31\u662f\u9032\u968e\u7248\u7684\u6c7a\u7b56\u6a39\uff0c\u6240\u8b02\u7684\u68ee\u6797\u5c31\u662f\u7531\u5f88\u591a\u68f5\u6c7a\u7b56\u6a39\u6240\u7d44\u6210\u3002\u96a8\u6a5f\u68ee\u6797\u662f\u4f7f\u7528 Bagging \u52a0\u4e0a\u96a8\u6a5f\u7279\u5fb5\u63a1\u6a23\u7684\u65b9\u6cd5\u6240\u7522\u751f\u51fa\u4f86\u7684\u6574\u9ad4\u5b78\u7fd2\u6f14\u7b97\u6cd5\u3002\u9084\u8a18\u5f97\u5728\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4e2d\uff0c\u7576\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u592a\u5927\u7684\u8a71\u5bb9\u6613\u8b93\u6a21\u578b\u904e\u64ec\u5408\u3002\u56e0\u6b64\u96a8\u6a5f\u68ee\u6797\u85c9\u7531\u591a\u68f5\u4e0d\u540c\u6a39\u7684\u6982\u5ff5\u6240\u7d44\u6210\uff0c\u8b93\u7d50\u679c\u6bd4\u8f03\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\uff0c\u4e26\u4f7f\u5f97\u9810\u6e2c\u80fd\u529b\u66f4\u63d0\u5347\u3002 \u96a8\u6a5f\u68ee\u6797\u7684\u751f\u6210\u65b9\u6cd5 \u9996\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u7136\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u5047\u8a2d\u6211\u5011\u6709\u4e00\u5343\u7b46\u8cc7\u6599\u6211\u5011\u8981\u5f9e\u4e2d\u62bd\u53d6 100 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u9019 100 \u7b46\u8cc7\u6599\u88e1\u9762\u53ef\u80fd\u6703\u6709\u91cd\u8907\u7684\u6578\u64da\u3002\u63a5\u8457\u7b2c\u4e8c\u6b65\u5f9e\u9019\u4e9b\u62bd\u53d6\u51fa\u4f86\u7684\u8cc7\u6599\u4e2d\u6311\u9078 k \u500b\u7279\u5fb5\u7576\u4f5c\u6c7a\u7b56\u56e0\u5b50\u7684\u5f8c\u9078\uff0c\u56e0\u6b64\u6bcf\u4e00\u68f5\u6a39\u53ea\u80fd\u770b\u898b\u90e8\u5206\u7684\u7279\u5fb5\u3002\u7b2c\u4e09\u6b65\u91cd\u8907\u4ee5\u4e0a\u6b65\u9a5f m \u6b21\u4e26\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39\u3002\u900f\u904e Bootstrap \u6b65\u9a5f\u91cd\u8907 m \u6b21\uff0c\u505a\u5b8c\u4e4b\u5f8c\u6211\u5011\u6703\u6709 m \u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u6bcf\u4e00\u7d44\u8a13\u7df4\u8cc7\u6599\u5167\u90fd\u6709 n\u2019 \u7b46\u8cc7\u6599\u3002\u6700\u5f8c\u518d\u900f\u904e\u6bcf\u68f5\u6a39\u7684\u6c7a\u7b56\u4e26\u63a1\u591a\u6578\u6c7a\u6295\u7968\u7684\u65b9\u5f0f\uff0c\u6c7a\u5b9a\u6700\u7d42\u9810\u6e2c\u7684\u985e\u5225\u3002\u56e0\u70ba\u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u7684\u7279\u5fb5\u6578\u91cf\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u6240\u4ee5\u6700\u5f8c\u6c7a\u7b56\u51fa\u4f86\u7684\u7d50\u679c\u90fd\u6703\u4e0d\u4e00\u6a23\u3002\u6700\u5f8c\u518d\u6839\u64da\u4efb\u52d9\u7684\u4e0d\u540c\u4f86\u505a\u8ff4\u6b78\u6216\u662f\u5206\u985e\u7684\u554f\u984c\uff0c\u5982\u679c\u662f\u8ff4\u6b78\u554f\u984c\u6211\u5011\u5c31\u5c07\u9019\u4e9b\u6c7a\u7b56\u6578\u7684\u8f38\u51fa\u505a\u5e73\u5747\u5f97\u5230\u6700\u5f8c\u7b54\u6848\uff0c\u82e5\u662f\u5206\u985e\u554f\u984c\u6211\u5011\u5247\u7528\u6295\u6a19\u63a1\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u4f86\u6574\u5408\u6240\u6709\u6a39\u9810\u6e2c\u7684\u7d50\u679c\u3002 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86 n\u2019 \u7b46\u8cc7\u6599\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c \u91cd\u8907 m \u6b21\uff0c\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39 \u5206\u985e: \u591a\u6578\u6295\u7968\u6a5f\u5236\u9032\u884c\u9810\u6e2c\u3001\u8ff4\u6b78: \u5e73\u5747\u6a5f\u5236\u9032\u884c\u9810\u6e2c \u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\uff1f \u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\u6709\u5169\u7a2e\u65b9\u9762\u53ef\u4ee5\u89e3\u91cb\u3002\u9996\u5148\u7b2c\u4e00\u500b\u662f\u96a8\u6a5f\u53d6\u6a23\uff0c\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6703\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u96a8\u6a5f\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u6b64\u62bd\u53d6\u8cc7\u6599\u7684\u65b9\u5f0f\u53c8\u7a31\u70ba Bootstrap\uff0c\u5b83\u662f\u4e00\u7a2e\u5728\u7d71\u8a08\u5b78\u4e0a\u5e38\u7528\u7684\u8cc7\u6599\u4f30\u8a08\u65b9\u6cd5\u3002\u7b2c\u4e8c\u500b\u89e3\u91cb\u96a8\u6a5f\u7684\u7406\u7531\u662f\u5728\u96a8\u6a5f\u68ee\u6797\u4e2d\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u96a8\u6a5f\u7684\u7279\u5fb5\u9078\u53d6\u3002\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u5f9e n\u2019 \u7b46\u8cc7\u6599\u4e2d\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c\u3002 \u5728 sklearn \u4e2d\uff0c\u6700\u591a\u96a8\u6a5f\u9078\u53d6 \ud835\udc59\ud835\udc5c\ud835\udc54 2 \ud835\udc41 \u500b\u7279\u5fb5 \u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede \u6bcf\u68f5\u6a39\u6703\u7528\u5230\u54ea\u4e9b\u8a13\u7df4\u8cc7\u6599\u53ca\u7279\u5fb5\u90fd\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a \u63a1\u7528\u591a\u500b\u6c7a\u7b56\u6a39\u7684\u6295\u7968\u6a5f\u5236\u4f86\u6539\u5584\u6c7a\u7b56\u6a39 \u8207\u6c7a\u7b56\u6a39\u76f8\u6bd4\uff0c\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408 \u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u7368\u7acb\u7684 \u8a13\u7df4\u6216\u662f\u9810\u6e2c\u7684\u968e\u6bb5\u6bcf\u4e00\u68f5\u6a39\u90fd\u80fd\u5e73\u884c\u5316\u7684\u904b\u884c [\u7a0b\u5f0f\u5be6\u4f5c] \u96a8\u6a5f\u68ee\u6797(\u5206\u985e\u5668) Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestClassifier # \u5efa\u7acb Random Forest Classifier \u6a21\u578b randomForestModel = RandomForestClassifier ( n_estimators = 100 , criterion = 'gini' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = randomForestModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , randomForestModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , randomForestModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.8888888888888888 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u7531\u65bc\u8a13\u7df4\u8cc7\u6599\u7b46\u6578\u4e0d\u591a\uff0c\u56e0\u6b64\u6a21\u578b\u8a13\u7df4\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u96c6\u7684\u5206\u5e03\u3002\u6700\u7d42\u5728\u6e2c\u8a66\u53ca\u9810\u6e2c\u7684\u8868\u73fe\u4e0a\u50c5\u6709 0.88 \u7684\u6e96\u78ba\u7387\u3002 \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6 \u53ea\u8981\u662f\u6c7a\u7b56\u6a39\u7cfb\u5217\u6f14\u7b97\u6cd5\uff0c\u4e0d\u7ba1\u662f\u5206\u985e\u5668\u6216\u662f\u8ff4\u6b78\u5668\u90fd\u80fd\u900f\u904e feature_importances_ \u4f86\u6aa2\u8996\u6a21\u578b\u9810\u6e2c\u5c0d\u65bc\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 print ( '\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: ' , randomForestModel . feature_importances_ ) \u8f38\u51fa\u7d50\u679c\uff1a \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: [0.09864249 0.01363871 0.44211602 0.44560278] \u96a8\u6a5f\u68ee\u6797(\u8ff4\u6b78\u5668) Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestRegressor # \u5efa\u7acbRandomForestRegressor\u6a21\u578b randomForestModel = RandomForestRegressor ( n_estimators = 100 , criterion = 'mse' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = randomForestModel . predict ( x ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#day-14-random-forest","text":"","title":"[Day 14] \u591a\u68f5\u6c7a\u7b56\u6a39\u66f4\u53b2\u5bb3\uff1a\u96a8\u6a5f\u68ee\u6797 (Random forest)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_1","text":"\u96a8\u6a5f\u68ee\u6797\u4ecb\u7d39 \u96a8\u6a5f\u68ee\u6797\u7684\u6a39\u662f\u5982\u4f55\u751f\u6210\uff1f\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u5206\u985e\u554f\u984c\uff1f \u96a8\u6a5f\u68ee\u6797\u5982\u4f55\u8655\u7406\u8ff4\u6b78\u554f\u984c\uff1f \u5be6\u4f5c\u96a8\u6a5f\u68ee\u6797\u5206\u985e\u5668 \u6bd4\u8f03\u96a8\u6a5f\u68ee\u6797\u8207\u6c7a\u7b56\u6a39\u5169\u8005\u5dee\u5225\u3002 \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f \u96a8\u6a5f\u68ee\u6797(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_2","text":"\u96a8\u6a5f\u68ee\u6797\u5176\u5be6\u5c31\u662f\u9032\u968e\u7248\u7684\u6c7a\u7b56\u6a39\uff0c\u6240\u8b02\u7684\u68ee\u6797\u5c31\u662f\u7531\u5f88\u591a\u68f5\u6c7a\u7b56\u6a39\u6240\u7d44\u6210\u3002\u96a8\u6a5f\u68ee\u6797\u662f\u4f7f\u7528 Bagging \u52a0\u4e0a\u96a8\u6a5f\u7279\u5fb5\u63a1\u6a23\u7684\u65b9\u6cd5\u6240\u7522\u751f\u51fa\u4f86\u7684\u6574\u9ad4\u5b78\u7fd2\u6f14\u7b97\u6cd5\u3002\u9084\u8a18\u5f97\u5728\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u6f14\u7b97\u6cd5\u4e2d\uff0c\u7576\u6a21\u578b\u7684\u6a39\u6700\u5927\u6df1\u5ea6\u8a2d\u5b9a\u592a\u5927\u7684\u8a71\u5bb9\u6613\u8b93\u6a21\u578b\u904e\u64ec\u5408\u3002\u56e0\u6b64\u96a8\u6a5f\u68ee\u6797\u85c9\u7531\u591a\u68f5\u4e0d\u540c\u6a39\u7684\u6982\u5ff5\u6240\u7d44\u6210\uff0c\u8b93\u7d50\u679c\u6bd4\u8f03\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\uff0c\u4e26\u4f7f\u5f97\u9810\u6e2c\u80fd\u529b\u66f4\u63d0\u5347\u3002","title":"\u96a8\u6a5f\u68ee\u6797"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_3","text":"\u9996\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u7136\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u5047\u8a2d\u6211\u5011\u6709\u4e00\u5343\u7b46\u8cc7\u6599\u6211\u5011\u8981\u5f9e\u4e2d\u62bd\u53d6 100 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u9019 100 \u7b46\u8cc7\u6599\u88e1\u9762\u53ef\u80fd\u6703\u6709\u91cd\u8907\u7684\u6578\u64da\u3002\u63a5\u8457\u7b2c\u4e8c\u6b65\u5f9e\u9019\u4e9b\u62bd\u53d6\u51fa\u4f86\u7684\u8cc7\u6599\u4e2d\u6311\u9078 k \u500b\u7279\u5fb5\u7576\u4f5c\u6c7a\u7b56\u56e0\u5b50\u7684\u5f8c\u9078\uff0c\u56e0\u6b64\u6bcf\u4e00\u68f5\u6a39\u53ea\u80fd\u770b\u898b\u90e8\u5206\u7684\u7279\u5fb5\u3002\u7b2c\u4e09\u6b65\u91cd\u8907\u4ee5\u4e0a\u6b65\u9a5f m \u6b21\u4e26\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39\u3002\u900f\u904e Bootstrap \u6b65\u9a5f\u91cd\u8907 m \u6b21\uff0c\u505a\u5b8c\u4e4b\u5f8c\u6211\u5011\u6703\u6709 m \u7d44\u7684\u8a13\u7df4\u8cc7\u6599\uff0c\u6bcf\u4e00\u7d44\u8a13\u7df4\u8cc7\u6599\u5167\u90fd\u6709 n\u2019 \u7b46\u8cc7\u6599\u3002\u6700\u5f8c\u518d\u900f\u904e\u6bcf\u68f5\u6a39\u7684\u6c7a\u7b56\u4e26\u63a1\u591a\u6578\u6c7a\u6295\u7968\u7684\u65b9\u5f0f\uff0c\u6c7a\u5b9a\u6700\u7d42\u9810\u6e2c\u7684\u985e\u5225\u3002\u56e0\u70ba\u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u7684\u7279\u5fb5\u6578\u91cf\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u6240\u4ee5\u6700\u5f8c\u6c7a\u7b56\u51fa\u4f86\u7684\u7d50\u679c\u90fd\u6703\u4e0d\u4e00\u6a23\u3002\u6700\u5f8c\u518d\u6839\u64da\u4efb\u52d9\u7684\u4e0d\u540c\u4f86\u505a\u8ff4\u6b78\u6216\u662f\u5206\u985e\u7684\u554f\u984c\uff0c\u5982\u679c\u662f\u8ff4\u6b78\u554f\u984c\u6211\u5011\u5c31\u5c07\u9019\u4e9b\u6c7a\u7b56\u6578\u7684\u8f38\u51fa\u505a\u5e73\u5747\u5f97\u5230\u6700\u5f8c\u7b54\u6848\uff0c\u82e5\u662f\u5206\u985e\u554f\u984c\u6211\u5011\u5247\u7528\u6295\u6a19\u63a1\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u4f86\u6574\u5408\u6240\u6709\u6a39\u9810\u6e2c\u7684\u7d50\u679c\u3002 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86 n\u2019 \u7b46\u8cc7\u6599\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c \u91cd\u8907 m \u6b21\uff0c\u7522\u751f m \u68f5\u6c7a\u7b56\u6a39 \u5206\u985e: \u591a\u6578\u6295\u7968\u6a5f\u5236\u9032\u884c\u9810\u6e2c\u3001\u8ff4\u6b78: \u5e73\u5747\u6a5f\u5236\u9032\u884c\u9810\u6e2c","title":"\u96a8\u6a5f\u68ee\u6797\u7684\u751f\u6210\u65b9\u6cd5"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_4","text":"\u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\u6709\u5169\u7a2e\u65b9\u9762\u53ef\u4ee5\u89e3\u91cb\u3002\u9996\u5148\u7b2c\u4e00\u500b\u662f\u96a8\u6a5f\u53d6\u6a23\uff0c\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6703\u5148\u5f9e\u8a13\u7df4\u96c6\u4e2d\u96a8\u6a5f\u62bd\u53d6 n\u2019 \u7b46\u8cc7\u6599\u51fa\u4f86\uff0c\u800c\u9019 n\u2019 \u7b46\u8cc7\u6599\u662f\u53ef\u4ee5\u88ab\u91cd\u8907\u62bd\u53d6\u7684\u3002\u6b64\u62bd\u53d6\u8cc7\u6599\u7684\u65b9\u5f0f\u53c8\u7a31\u70ba Bootstrap\uff0c\u5b83\u662f\u4e00\u7a2e\u5728\u7d71\u8a08\u5b78\u4e0a\u5e38\u7528\u7684\u8cc7\u6599\u4f30\u8a08\u65b9\u6cd5\u3002\u7b2c\u4e8c\u500b\u89e3\u91cb\u96a8\u6a5f\u7684\u7406\u7531\u662f\u5728\u96a8\u6a5f\u68ee\u6797\u4e2d\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u96a8\u6a5f\u7684\u7279\u5fb5\u9078\u53d6\u3002\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u5f9e n\u2019 \u7b46\u8cc7\u6599\u4e2d\u96a8\u6a5f\u6311\u9078 k \u500b\u7279\u5fb5\u505a\u6a23\u672c\u3002 \u5728 sklearn \u4e2d\uff0c\u6700\u591a\u96a8\u6a5f\u9078\u53d6 \ud835\udc59\ud835\udc5c\ud835\udc54 2 \ud835\udc41 \u500b\u7279\u5fb5","title":"\u96a8\u6a5f\u68ee\u6797\u4e2d\u7684\u96a8\u6a5f\uff1f"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_5","text":"\u6bcf\u68f5\u6a39\u6703\u7528\u5230\u54ea\u4e9b\u8a13\u7df4\u8cc7\u6599\u53ca\u7279\u5fb5\u90fd\u662f\u7531\u96a8\u6a5f\u6c7a\u5b9a \u63a1\u7528\u591a\u500b\u6c7a\u7b56\u6a39\u7684\u6295\u7968\u6a5f\u5236\u4f86\u6539\u5584\u6c7a\u7b56\u6a39 \u8207\u6c7a\u7b56\u6a39\u76f8\u6bd4\uff0c\u4e0d\u5bb9\u6613\u904e\u5ea6\u64ec\u5408 \u96a8\u6a5f\u68ee\u6797\u6bcf\u4e00\u68f5\u6a39\u90fd\u662f\u7368\u7acb\u7684 \u8a13\u7df4\u6216\u662f\u9810\u6e2c\u7684\u968e\u6bb5\u6bcf\u4e00\u68f5\u6a39\u90fd\u80fd\u5e73\u884c\u5316\u7684\u904b\u884c","title":"\u96a8\u6a5f\u68ee\u6797\u7684\u512a\u9ede"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_6","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_7","text":"Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u4e82\u5ea6\u7684\u8a55\u4f30\u6a19\u6e96\uff0cgini/entropy\u3002\u9810\u8a2d\u70bagini\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestClassifier # \u5efa\u7acb Random Forest Classifier \u6a21\u578b randomForestModel = RandomForestClassifier ( n_estimators = 100 , criterion = 'gini' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = randomForestModel . predict ( X_train )","title":"\u96a8\u6a5f\u68ee\u6797(\u5206\u985e\u5668)"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , randomForestModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , randomForestModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.8888888888888888 \u6211\u5011\u53ef\u4ee5\u67e5\u770b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002\u7531\u65bc\u8a13\u7df4\u8cc7\u6599\u7b46\u6578\u4e0d\u591a\uff0c\u56e0\u6b64\u6a21\u578b\u8a13\u7df4\u5bb9\u6613\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u96c6\u7684\u5206\u5e03\u3002\u6700\u7d42\u5728\u6e2c\u8a66\u53ca\u9810\u6e2c\u7684\u8868\u73fe\u4e0a\u50c5\u6709 0.88 \u7684\u6e96\u78ba\u7387\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_8","text":"\u53ea\u8981\u662f\u6c7a\u7b56\u6a39\u7cfb\u5217\u6f14\u7b97\u6cd5\uff0c\u4e0d\u7ba1\u662f\u5206\u985e\u5668\u6216\u662f\u8ff4\u6b78\u5668\u90fd\u80fd\u900f\u904e feature_importances_ \u4f86\u6aa2\u8996\u6a21\u578b\u9810\u6e2c\u5c0d\u65bc\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 print ( '\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: ' , randomForestModel . feature_importances_ ) \u8f38\u51fa\u7d50\u679c\uff1a \u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6: [0.09864249 0.01363871 0.44211602 0.44560278]","title":"\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6"},{"location":"14.\u96a8\u6a5f\u68ee\u6797/#_9","text":"Parameters: - n_estimators: \u68ee\u6797\u4e2d\u6a39\u6728\u7684\u6578\u91cf\uff0c\u9810\u8a2d=100\u3002 - max_features: \u5283\u5206\u6642\u8003\u616e\u7684\u6700\u5927\u7279\u5fb5\u6578\uff0c\u9810\u8a2dauto\u3002 - criterion: \u8a55\u4f30\u5207\u5272\u9ede\u6307\u6a19\uff0cmse/mae\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\u3002 - splitter: \u7279\u5fb5\u5283\u5206\u9ede\u9078\u64c7\u6a19\u6e96\uff0cbest/random\u3002\u9810\u8a2d\u70babest\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\uff0csplitter=random \u624d\u6709\u7528\u3002 - min_samples_split: \u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u518d\u5206 - min_samples_leaf: \u5206\u5b8c\u81f3\u5c11\u6709\u591a\u5c11\u8cc7\u6599\u624d\u80fd\u5206 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - get_depth: \u53d6\u5f97\u6a39\u7684\u6df1\u5ea6\u3002 from sklearn.ensemble import RandomForestRegressor # \u5efa\u7acbRandomForestRegressor\u6a21\u578b randomForestModel = RandomForestRegressor ( n_estimators = 100 , criterion = 'mse' ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b randomForestModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = randomForestModel . predict ( x ) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u96a8\u6a5f\u68ee\u6797(\u8ff4\u6b78\u5668)"},{"location":"15.XGBoost/","text":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 XGBoost \u4ecb\u7d39 XGBoost \u662f\u4ec0\u9ebc\uff1f\u70ba\u4ec0\u9ebc\u5b83\u90a3\u9ebc\u5f37\u5927\uff1f XGBoost \u512a\u9ede \u6bd4\u8f03\u5169\u7a2e\u6574\u9ad4\u5b78\u7fd2\u67b6\u69cb\u5dee\u7570\uff1f Bagging vs. Boosting Boosting vs. Decision Tree Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b \u5be6\u4f5c XGBoost \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u6bd4\u8f03 Bagging \u8207 Boosting \u5169\u8005\u5dee\u5225 \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Regression)\uff1a \u4eba\u4eba\u9a5a\u5947\u7684 XGBoost XGboost \u5168\u540d\u70ba eXtreme Gradient Boosting\uff0c\u662f\u76ee\u524d Kaggle \u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u5230\u7684\u7b97\u6cd5\uff0c\u540c\u6642\u4e5f\u662f\u591a\u6578\u5f97\u734e\u8005\u6240\u4f7f\u7528\u7684\u6a21\u578b\u3002\u6b64\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u662f\u7531\u83ef\u76db\u9813\u5927\u5b78\u535a\u58eb\u751f\u9673\u5929\u5947\u6240\u63d0\u51fa\u4f86\u7684\uff0c\u5b83\u662f\u4ee5 Gradient Boosting \u70ba\u57fa\u790e\u4e0b\u53bb\u5be6\u4f5c\uff0c\u4e26\u6dfb\u52a0\u4e00\u4e9b\u65b0\u7684\u6280\u5de7\u3002\u5b83\u53ef\u4ee5\u8aaa\u662f\u7d50\u5408 Bagging \u548c Boosting \u7684\u512a\u9ede\u3002XGboost \u4fdd\u6709 Gradient Boosting \u7684\u505a\u6cd5\uff0c\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u95dc\u806f\u7684\uff0c\u76ee\u6a19\u662f\u5e0c\u671b\u5f8c\u9762\u751f\u6210\u7684\u6a39\u80fd\u5920\u4fee\u6b63\u524d\u9762\u4e00\u68f5\u6a39\u72af\u932f\u7684\u5730\u65b9\u3002\u6b64\u5916 XGboost \u662f\u63a1\u7528\u7279\u5fb5\u96a8\u6a5f\u63a1\u6a23\u7684\u6280\u5de7\uff0c\u548c\u96a8\u6a5f\u68ee\u6797\u4e00\u6a23\u5728\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\u7684\u6642\u5019\u96a8\u6a5f\u62bd\u53d6\u7279\u5fb5\uff0c\u56e0\u6b64\u5728\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u4e2d\u4e26\u4e0d\u6703\u6bcf\u4e00\u6b21\u90fd\u62ff\u5168\u90e8\u7684\u7279\u5fb5\u53c3\u8207\u6c7a\u7b56\u3002\u6b64\u5916\u70ba\u4e86\u8b93\u6a21\u578b\u904e\u65bc\u8907\u96dc\uff0cXGboost \u5728\u76ee\u6a19\u51fd\u6578\u6dfb\u52a0\u4e86\u6a19\u6e96\u5316\u3002\u56e0\u70ba\u6a21\u578b\u5728\u8a13\u7df4\u6642\u70ba\u4e86\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\uff0c\u6703\u7522\u751f\u5f88\u591a\u9ad8\u6b21\u9805\u7684\u51fd\u6578\uff0c\u4f46\u53cd\u800c\u5bb9\u6613\u88ab\u96dc\u8a0a\u5e72\u64fe\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64 L1/L2 Regularization \u76ee\u7684\u662f\u8b93\u640d\u5931\u51fd\u6578\u66f4\u4f73\u5e73\u6ed1\uff0c\u4e14\u6297\u96dc\u8a0a\u5e72\u64fe\u80fd\u529b\u66f4\u5927\u3002\u6700\u5f8c XGboost \u9084\u7528\u5230\u4e86\u4e00\u968e\u5c0e\u6578\u548c\u4e8c\u968e\u5c0e\u6578\u4f86\u751f\u6210\u4e0b\u4e00\u68f5\u6a39\u3002\u5176\u4e2d Gradient \u5c31\u662f\u6240\u8b02\u7684\u4e00\u968e\u5c0e\u6578\uff0c\u800c Hessian \u5373\u70ba\u4e8c\u968e\u5c0e\u6578\u3002 XGBoost \u512a\u9ede XGBoost \u9664\u4e86\u53ef\u4ee5\u505a\u5206\u985e\u4e5f\u80fd\u9032\u884c\u8ff4\u6b78\u9023\u7e8c\u6027\u6578\u503c\u7684\u9810\u6e2c\uff0c\u800c\u4e14\u6548\u679c\u901a\u5e38\u90fd\u4e0d\u5dee\u3002\u4e26\u900f\u904e Boosting \u6280\u5de7\u5c07\u8a31\u591a\u5f31\u6c7a\u7b56\u6a39\u96c6\u6210\u5728\u4e00\u8d77\u5f62\u6210\u4e00\u500b\u5f37\u7684\u9810\u6e2c\u6a21\u578b\u3002 \u5229\u7528\u4e86\u4e8c\u968e\u68af\u5ea6\u4f86\u5c0d\u7bc0\u9ede\u9032\u884c\u5283\u5206 \u5229\u7528\u5c40\u90e8\u8fd1\u4f3c\u7b97\u6cd5\u5c0d\u5206\u88c2\u7bc0\u9ede\u9032\u884c\u512a\u5316 \u5728\u640d\u5931\u51fd\u6578\u4e2d\u52a0\u5165\u4e86 L1/L2 \u9805\uff0c\u63a7\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6 \u63d0\u4f9b GPU \u5e73\u884c\u5316\u904b\u7b97 Bagging vs. Boosting \u5728\u9019\u88e1\u5e6b\u5927\u5bb6\u56de\u9867\u4e00\u4e0b\u6574\u9ad4\u5b78\u7fd2\u4e2d\u7684 Bagging \u8207 Boosting \u5169\u8005\u9593\u7684\u5dee\u7570\u3002\u9996\u5148 Bagging \u900f\u904e\u96a8\u6a5f\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\uff0c\u6700\u91cd\u8981\u7684\u662f\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb\u4e26\u7121\u95dc\u806f\u3002\u5148\u524d\u6240\u63d0\u5230\u7684\u96a8\u6a5f\u68ee\u6797\u5c31\u662f Bagging \u7684\u5be6\u4f8b\u3002\u53e6\u5916 Boosting \u5247\u662f\u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u6240\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc\u3002\u672c\u7ae0\u6240\u63d0\u53ca\u7684 XGBoost \u5c31\u662f Boosting \u65b9\u6cd5\u7684\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u6b63\u662f\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6539\u5584\u4e86\u4e0a\u4e00\u68f5\u6a39\u5b78\u7fd2\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u56e0\u6b64 Boosting \u7684\u6a21\u578b\u901a\u5e38\u6703\u6bd4 Bagging \u9084\u4f86\u7684\u7cbe\u6e96\u3002 Bagging \u900f\u904e\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb Boosting \u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc Boosting vs. Decision Tree \u6211\u5011\u518d\u8207\u6700\u4e00\u958b\u59cb\u6240\u63d0\u7684\u6c7a\u7b56\u6a39\u505a\u6bd4\u8f03\u3002\u6c7a\u7b56\u6a39\u901a\u5e38\u70ba\u4e00\u68f5\u8907\u96dc\u7684\u6a39\uff0c\u800c\u5728 Boosting \u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u7684\u6a39\u90fd\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6700\u7d42\u6211\u5011\u8981\u628a\u6240\u6709\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u8f38\u51fa\u3002 Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b AdaBoost \u662f\u7531 Yoav Freund \u548c Robert Schapire \u65bc 1995 \u5e74\u63d0\u51fa\u3002\u6240\u8b02\u7684\u81ea\u9069\u61c9\u662f\u8868\u793a\u6839\u64da\u5f31\u5b78\u7fd2\u7684\u5b78\u7fd2\u8aa4\u5dee\u7387\u8868\u73fe\u4f86\u66f4\u65b0\u8a13\u7df4\u6a23\u672c\u7684\u6b0a\u91cd\uff0c\u7136\u5f8c\u57fa\u65bc\u8abf\u6574\u6b0a\u91cd\u5f8c\u7684\u8a13\u7df4\u96c6\u4f86\u8a13\u7df4\u7b2c\u4e8c\u500b\u5f31\u5b78\u7fd2\u5668\uff0c\u85c9\u7531\u6b64\u65b9\u6cd5\u4e0d\u65b7\u7684\u8fed\u4ee3\u4e0b\u53bb\u3002 AdaBoost\uff08Adaptive Boosting) AdaBoostClassifier AdaBoostRegressor Gradient Boosting \u7531 Friedman \u65bc 1999 \u5e74\u63d0\u51fa\u3002\u5176\u4e2d GBDT (Gradient Boosting Decision Tree) \u7684\u5f31\u5b78\u7fd2\u5668\u50c5\u9650\u65bc\u53ea\u80fd\u4f7f\u7528 CART \u6c7a\u7b56\u6a39\u6a21\u578b\uff0c\u4e26\u63a1\u7528\u52a0\u6cd5\u6a21\u578b\u7684\u524d\u5411\u5206\u6b65\u7b97\u6cd5\u4f86\u89e3\u6c7a\u5206\u985e\u548c\u8ff4\u6b78\u554f\u984c\u3002 Gradient Boosting GradientBoostingClassifier GradientBoostingRegressor \u63a5\u4e0b\u4f86\u4ecb\u7d39\u4e09\u500b\u8fd1\u5e74\u4e09\u500b\u5f37\u5927\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u3002\u9996\u5148 XGBoost \u6700\u521d\u662f\u7531\u9673\u5929\u5947\u65bc 2014 \u5e74 3 \u6708\u767c\u8d77\u7684\u4e00\u500b\u7814\u7a76\u9805\u76ee\uff0c\u4e26\u5728\u77ed\u6642\u9593\u5167\u6210\u70ba\u7af6\u8cfd\u4e2d\u7684\u71b1\u9580\u7684\u6a21\u578b\u3002\u63a5\u8457\u65bc 2017 \u5e74 1 \u6708\u5fae\u8edf\u767c\u5e03\u4e86\u7b2c\u4e00\u500b\u7a69\u5b9a\u7684 LightGBM \u7248\u672c\u3002\u5b83\u662f\u4e00\u500b\u57fa\u65bc Gradient Boosting \u7684\u8f15\u91cf\u7d1a\u7684\u6f14\u7b97\u6cd5\uff0c\u512a\u9ede\u5728\u65bc\u4f7f\u7528\u5c11\u91cf\u8cc7\u6e90\u3001\u66f4\u5feb\u7684\u8a13\u7df4\u6548\u7387\u5f97\u5230\u66f4\u597d\u7684\u6e96\u78ba\u5ea6\u3002\u53e6\u5916\u5728\u540c\u5e74\u7684 4 \u6708\uff0c\u4fc4\u7f85\u65af\u7684\u4e00\u5bb6\u79d1\u6280\u516c\u53f8 Yandex \u767c\u5e03\u4e86 CatBoost \uff0c\u5176\u6838\u5fc3\u4f9d\u7136\u4f7f\u7528\u4e86 Gradient Boosting \u6280\u5de7\uff0c\u4e26\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7279\u5225\u7684\u8f49\u63db\u4e26\u7522\u751f\u65b0\u7684\u6578\u503c\u578b\u7279\u5fb5\u3002 \u672a\u4f86\u5e7e\u5929\u5c07\u6703\u4ecb\u7d39 LightGBM \u8207 CatBoost \u54e6\uff01 [\u7a0b\u5f0f\u5be6\u4f5c] XGBoost \u5206\u985e\u5668 Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , xgboostModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , xgboostModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8207\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u548c\u96a8\u6a5f\u68ee\u6797\u5169\u500b\u6a21\u578b\u76f8\u6bd4\u8f03\u3002\u662f\u4e0d\u662f XGBoost \u6709\u8457\u66f4\u597d\u7684\u9810\u6e2c\u7d50\u679c\u5462\uff1f\u56e0\u70ba\u6709\u4e86 Gradient Boosting \u5b78\u7fd2\u6a5f\u5236\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u9810\u6e2c\u80fd\u529b\u3002\u5728\u5b78\u7fd2\u904e\u7a0b\u4e2d\u5c07\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u5c24\u5176\u662f\u6a58\u8272 (Versicolour) \u8207\u7da0\u8272 (Virginica) \u4ea4\u754c\u8655\u6709\u66f4\u597d\u7684\u8a55\u4f30\u80fd\u529b\u3002 XGBoost (\u8ff4\u6b78\u5668) Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import xgboost as xgb # \u5efa\u7acb XGBRegressor \u6a21\u578b xgbrModel = xgb . XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgbrModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgbrModel . predict ( x ) Reference XGboost\u5165\u9580\u7d93\u9a57\u5206\u4eab-\u8d85\u53c3\u6578\u89e3\u6790 \u95dc\u65bc XGBoost 20 \u500b FAQ \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost"},{"location":"15.XGBoost/#day-15-xgboost","text":"","title":"[Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost"},{"location":"15.XGBoost/#_1","text":"XGBoost \u4ecb\u7d39 XGBoost \u662f\u4ec0\u9ebc\uff1f\u70ba\u4ec0\u9ebc\u5b83\u90a3\u9ebc\u5f37\u5927\uff1f XGBoost \u512a\u9ede \u6bd4\u8f03\u5169\u7a2e\u6574\u9ad4\u5b78\u7fd2\u67b6\u69cb\u5dee\u7570\uff1f Bagging vs. Boosting Boosting vs. Decision Tree Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b \u5be6\u4f5c XGBoost \u5206\u985e\u5668\u8207\u8ff4\u6b78\u5668 \u6bd4\u8f03 Bagging \u8207 Boosting \u5169\u8005\u5dee\u5225 \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Classification)\uff1a \u7bc4\u4f8b\u7a0b\u5f0f XGBoost(Regression)\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"15.XGBoost/#xgboost","text":"XGboost \u5168\u540d\u70ba eXtreme Gradient Boosting\uff0c\u662f\u76ee\u524d Kaggle \u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u5230\u7684\u7b97\u6cd5\uff0c\u540c\u6642\u4e5f\u662f\u591a\u6578\u5f97\u734e\u8005\u6240\u4f7f\u7528\u7684\u6a21\u578b\u3002\u6b64\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u662f\u7531\u83ef\u76db\u9813\u5927\u5b78\u535a\u58eb\u751f\u9673\u5929\u5947\u6240\u63d0\u51fa\u4f86\u7684\uff0c\u5b83\u662f\u4ee5 Gradient Boosting \u70ba\u57fa\u790e\u4e0b\u53bb\u5be6\u4f5c\uff0c\u4e26\u6dfb\u52a0\u4e00\u4e9b\u65b0\u7684\u6280\u5de7\u3002\u5b83\u53ef\u4ee5\u8aaa\u662f\u7d50\u5408 Bagging \u548c Boosting \u7684\u512a\u9ede\u3002XGboost \u4fdd\u6709 Gradient Boosting \u7684\u505a\u6cd5\uff0c\u6bcf\u4e00\u68f5\u6a39\u662f\u4e92\u76f8\u95dc\u806f\u7684\uff0c\u76ee\u6a19\u662f\u5e0c\u671b\u5f8c\u9762\u751f\u6210\u7684\u6a39\u80fd\u5920\u4fee\u6b63\u524d\u9762\u4e00\u68f5\u6a39\u72af\u932f\u7684\u5730\u65b9\u3002\u6b64\u5916 XGboost \u662f\u63a1\u7528\u7279\u5fb5\u96a8\u6a5f\u63a1\u6a23\u7684\u6280\u5de7\uff0c\u548c\u96a8\u6a5f\u68ee\u6797\u4e00\u6a23\u5728\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\u7684\u6642\u5019\u96a8\u6a5f\u62bd\u53d6\u7279\u5fb5\uff0c\u56e0\u6b64\u5728\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u4e2d\u4e26\u4e0d\u6703\u6bcf\u4e00\u6b21\u90fd\u62ff\u5168\u90e8\u7684\u7279\u5fb5\u53c3\u8207\u6c7a\u7b56\u3002\u6b64\u5916\u70ba\u4e86\u8b93\u6a21\u578b\u904e\u65bc\u8907\u96dc\uff0cXGboost \u5728\u76ee\u6a19\u51fd\u6578\u6dfb\u52a0\u4e86\u6a19\u6e96\u5316\u3002\u56e0\u70ba\u6a21\u578b\u5728\u8a13\u7df4\u6642\u70ba\u4e86\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\uff0c\u6703\u7522\u751f\u5f88\u591a\u9ad8\u6b21\u9805\u7684\u51fd\u6578\uff0c\u4f46\u53cd\u800c\u5bb9\u6613\u88ab\u96dc\u8a0a\u5e72\u64fe\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64 L1/L2 Regularization \u76ee\u7684\u662f\u8b93\u640d\u5931\u51fd\u6578\u66f4\u4f73\u5e73\u6ed1\uff0c\u4e14\u6297\u96dc\u8a0a\u5e72\u64fe\u80fd\u529b\u66f4\u5927\u3002\u6700\u5f8c XGboost \u9084\u7528\u5230\u4e86\u4e00\u968e\u5c0e\u6578\u548c\u4e8c\u968e\u5c0e\u6578\u4f86\u751f\u6210\u4e0b\u4e00\u68f5\u6a39\u3002\u5176\u4e2d Gradient \u5c31\u662f\u6240\u8b02\u7684\u4e00\u968e\u5c0e\u6578\uff0c\u800c Hessian \u5373\u70ba\u4e8c\u968e\u5c0e\u6578\u3002","title":"\u4eba\u4eba\u9a5a\u5947\u7684 XGBoost"},{"location":"15.XGBoost/#xgboost_1","text":"XGBoost \u9664\u4e86\u53ef\u4ee5\u505a\u5206\u985e\u4e5f\u80fd\u9032\u884c\u8ff4\u6b78\u9023\u7e8c\u6027\u6578\u503c\u7684\u9810\u6e2c\uff0c\u800c\u4e14\u6548\u679c\u901a\u5e38\u90fd\u4e0d\u5dee\u3002\u4e26\u900f\u904e Boosting \u6280\u5de7\u5c07\u8a31\u591a\u5f31\u6c7a\u7b56\u6a39\u96c6\u6210\u5728\u4e00\u8d77\u5f62\u6210\u4e00\u500b\u5f37\u7684\u9810\u6e2c\u6a21\u578b\u3002 \u5229\u7528\u4e86\u4e8c\u968e\u68af\u5ea6\u4f86\u5c0d\u7bc0\u9ede\u9032\u884c\u5283\u5206 \u5229\u7528\u5c40\u90e8\u8fd1\u4f3c\u7b97\u6cd5\u5c0d\u5206\u88c2\u7bc0\u9ede\u9032\u884c\u512a\u5316 \u5728\u640d\u5931\u51fd\u6578\u4e2d\u52a0\u5165\u4e86 L1/L2 \u9805\uff0c\u63a7\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6 \u63d0\u4f9b GPU \u5e73\u884c\u5316\u904b\u7b97","title":"XGBoost \u512a\u9ede"},{"location":"15.XGBoost/#bagging-vs-boosting","text":"\u5728\u9019\u88e1\u5e6b\u5927\u5bb6\u56de\u9867\u4e00\u4e0b\u6574\u9ad4\u5b78\u7fd2\u4e2d\u7684 Bagging \u8207 Boosting \u5169\u8005\u9593\u7684\u5dee\u7570\u3002\u9996\u5148 Bagging \u900f\u904e\u96a8\u6a5f\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6bcf\u4e00\u68f5\u6a39\uff0c\u6700\u91cd\u8981\u7684\u662f\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb\u4e26\u7121\u95dc\u806f\u3002\u5148\u524d\u6240\u63d0\u5230\u7684\u96a8\u6a5f\u68ee\u6797\u5c31\u662f Bagging \u7684\u5be6\u4f8b\u3002\u53e6\u5916 Boosting \u5247\u662f\u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u6240\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc\u3002\u672c\u7ae0\u6240\u63d0\u53ca\u7684 XGBoost \u5c31\u662f Boosting \u65b9\u6cd5\u7684\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u6b63\u662f\u6bcf\u68f5\u6a39\u7684\u751f\u6210\u90fd\u6539\u5584\u4e86\u4e0a\u4e00\u68f5\u6a39\u5b78\u7fd2\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u56e0\u6b64 Boosting \u7684\u6a21\u578b\u901a\u5e38\u6703\u6bd4 Bagging \u9084\u4f86\u7684\u7cbe\u6e96\u3002 Bagging \u900f\u904e\u62bd\u6a23\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u6bcf\u68f5\u6a39\u5f7c\u6b64\u7368\u7acb Boosting \u900f\u904e\u5e8f\u5217\u7684\u65b9\u5f0f\u751f\u6210\u6a39\uff0c\u5f8c\u9762\u751f\u6210\u7684\u6a39\u6703\u8207\u524d\u4e00\u68f5\u6a39\u76f8\u95dc","title":"Bagging vs. Boosting"},{"location":"15.XGBoost/#boosting-vs-decision-tree","text":"\u6211\u5011\u518d\u8207\u6700\u4e00\u958b\u59cb\u6240\u63d0\u7684\u6c7a\u7b56\u6a39\u505a\u6bd4\u8f03\u3002\u6c7a\u7b56\u6a39\u901a\u5e38\u70ba\u4e00\u68f5\u8907\u96dc\u7684\u6a39\uff0c\u800c\u5728 Boosting \u662f\u7522\u751f\u975e\u5e38\u591a\u68f5\u7684\u6a39\uff0c\u4f46\u662f\u6bcf\u4e00\u68f5\u7684\u6a39\u90fd\u5f88\u7c21\u55ae\u7684\u6c7a\u7b56\u6a39\u3002Boosting \u5e0c\u671b\u65b0\u7684\u6a39\u53ef\u4ee5\u91dd\u5c0d\u820a\u7684\u6a39\u9810\u6e2c\u4e0d\u592a\u597d\u7684\u90e8\u5206\u505a\u4e00\u4e9b\u88dc\u5f37\u3002\u6700\u7d42\u6211\u5011\u8981\u628a\u6240\u6709\u7c21\u55ae\u7684\u6a39\u5408\u518d\u4e00\u8d77\u624d\u80fd\u7576\u6700\u5f8c\u7684\u9810\u6e2c\u8f38\u51fa\u3002","title":"Boosting vs. Decision Tree"},{"location":"15.XGBoost/#boosting","text":"AdaBoost \u662f\u7531 Yoav Freund \u548c Robert Schapire \u65bc 1995 \u5e74\u63d0\u51fa\u3002\u6240\u8b02\u7684\u81ea\u9069\u61c9\u662f\u8868\u793a\u6839\u64da\u5f31\u5b78\u7fd2\u7684\u5b78\u7fd2\u8aa4\u5dee\u7387\u8868\u73fe\u4f86\u66f4\u65b0\u8a13\u7df4\u6a23\u672c\u7684\u6b0a\u91cd\uff0c\u7136\u5f8c\u57fa\u65bc\u8abf\u6574\u6b0a\u91cd\u5f8c\u7684\u8a13\u7df4\u96c6\u4f86\u8a13\u7df4\u7b2c\u4e8c\u500b\u5f31\u5b78\u7fd2\u5668\uff0c\u85c9\u7531\u6b64\u65b9\u6cd5\u4e0d\u65b7\u7684\u8fed\u4ee3\u4e0b\u53bb\u3002 AdaBoost\uff08Adaptive Boosting) AdaBoostClassifier AdaBoostRegressor Gradient Boosting \u7531 Friedman \u65bc 1999 \u5e74\u63d0\u51fa\u3002\u5176\u4e2d GBDT (Gradient Boosting Decision Tree) \u7684\u5f31\u5b78\u7fd2\u5668\u50c5\u9650\u65bc\u53ea\u80fd\u4f7f\u7528 CART \u6c7a\u7b56\u6a39\u6a21\u578b\uff0c\u4e26\u63a1\u7528\u52a0\u6cd5\u6a21\u578b\u7684\u524d\u5411\u5206\u6b65\u7b97\u6cd5\u4f86\u89e3\u6c7a\u5206\u985e\u548c\u8ff4\u6b78\u554f\u984c\u3002 Gradient Boosting GradientBoostingClassifier GradientBoostingRegressor \u63a5\u4e0b\u4f86\u4ecb\u7d39\u4e09\u500b\u8fd1\u5e74\u4e09\u500b\u5f37\u5927\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u3002\u9996\u5148 XGBoost \u6700\u521d\u662f\u7531\u9673\u5929\u5947\u65bc 2014 \u5e74 3 \u6708\u767c\u8d77\u7684\u4e00\u500b\u7814\u7a76\u9805\u76ee\uff0c\u4e26\u5728\u77ed\u6642\u9593\u5167\u6210\u70ba\u7af6\u8cfd\u4e2d\u7684\u71b1\u9580\u7684\u6a21\u578b\u3002\u63a5\u8457\u65bc 2017 \u5e74 1 \u6708\u5fae\u8edf\u767c\u5e03\u4e86\u7b2c\u4e00\u500b\u7a69\u5b9a\u7684 LightGBM \u7248\u672c\u3002\u5b83\u662f\u4e00\u500b\u57fa\u65bc Gradient Boosting \u7684\u8f15\u91cf\u7d1a\u7684\u6f14\u7b97\u6cd5\uff0c\u512a\u9ede\u5728\u65bc\u4f7f\u7528\u5c11\u91cf\u8cc7\u6e90\u3001\u66f4\u5feb\u7684\u8a13\u7df4\u6548\u7387\u5f97\u5230\u66f4\u597d\u7684\u6e96\u78ba\u5ea6\u3002\u53e6\u5916\u5728\u540c\u5e74\u7684 4 \u6708\uff0c\u4fc4\u7f85\u65af\u7684\u4e00\u5bb6\u79d1\u6280\u516c\u53f8 Yandex \u767c\u5e03\u4e86 CatBoost \uff0c\u5176\u6838\u5fc3\u4f9d\u7136\u4f7f\u7528\u4e86 Gradient Boosting \u6280\u5de7\uff0c\u4e26\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7279\u5225\u7684\u8f49\u63db\u4e26\u7522\u751f\u65b0\u7684\u6578\u503c\u578b\u7279\u5fb5\u3002 \u672a\u4f86\u5e7e\u5929\u5c07\u6703\u4ecb\u7d39 LightGBM \u8207 CatBoost \u54e6\uff01","title":"Boosting \u65b9\u6cd5\u6709\u54ea\u4e9b"},{"location":"15.XGBoost/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"15.XGBoost/#xgboost_2","text":"Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train )","title":"XGBoost \u5206\u985e\u5668"},{"location":"15.XGBoost/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , xgboostModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , xgboostModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 1.0 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8207\u524d\u5e7e\u5929\u7684\u6c7a\u7b56\u6a39\u548c\u96a8\u6a5f\u68ee\u6797\u5169\u500b\u6a21\u578b\u76f8\u6bd4\u8f03\u3002\u662f\u4e0d\u662f XGBoost \u6709\u8457\u66f4\u597d\u7684\u9810\u6e2c\u7d50\u679c\u5462\uff1f\u56e0\u70ba\u6709\u4e86 Gradient Boosting \u5b78\u7fd2\u6a5f\u5236\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u9810\u6e2c\u80fd\u529b\u3002\u5728\u5b78\u7fd2\u904e\u7a0b\u4e2d\u5c07\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\uff0c\u5c24\u5176\u662f\u6a58\u8272 (Versicolour) \u8207\u7da0\u8272 (Virginica) \u4ea4\u754c\u8655\u6709\u66f4\u597d\u7684\u8a55\u4f30\u80fd\u529b\u3002","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"},{"location":"15.XGBoost/#xgboost_3","text":"Parameters: - n_estimators: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9ed8\u8a8d\u503c\u70ba6\u3002 - booster: gbtree \u6a39\u6a21\u578b(\u9810\u8a2d) / gbliner \u7dda\u6027\u6a21\u578b - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.3\u3002 - gamma: \u61f2\u7f70\u9805\u4fc2\u6578\uff0c\u6307\u5b9a\u7bc0\u9ede\u5206\u88c2\u6240\u9700\u7684\u6700\u5c0f\u640d\u5931\u51fd\u6578\u4e0b\u964d\u503c\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import xgboost as xgb # \u5efa\u7acb XGBRegressor \u6a21\u578b xgbrModel = xgb . XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgbrModel . fit ( x , y ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgbrModel . predict ( x )","title":"XGBoost (\u8ff4\u6b78\u5668)"},{"location":"15.XGBoost/#reference","text":"XGboost\u5165\u9580\u7d93\u9a57\u5206\u4eab-\u8d85\u53c3\u6578\u89e3\u6790 \u95dc\u65bc XGBoost 20 \u500b FAQ \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"16.Stacking/","text":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 Stacking \u65b9\u6cd5 \u5806\u758a\u6cd5\u7684\u5b78\u7fd2\u6a5f\u5236\u70ba\u4f55\uff1f \u5229\u7528 Stacking \u5be6\u4f5c\u8ff4\u6b78\u5668 \u900f\u904e Stacking Regressor \u5efa\u7acb\u623f\u50f9\u9810\u6e2c\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5806\u758a\u6cd5 (Stacking) \u662f\u6574\u9ad4\u5b78\u7fd2\u4e2d\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u5b83\u662f\u7d50\u5408\u8a31\u591a\u7368\u7acb\u7684\u6a21\u578b\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\uff0c\u4e26\u5c07\u6bcf\u500b\u7368\u7acb\u6a21\u578b\u7684\u8f38\u51fa\u8996\u70ba\u6700\u7d42\u6a21\u578b\u9810\u6e2c\u7684\u8f38\u5165\u7279\u5fb5\uff0c\u6700\u5f8c\u518d\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u6a21\u578b\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u4e8b\u5148\u8a13\u7df4\u4e09\u500b\u57fa\u5e95\u7684\u6a21\u578b (base learner)\uff0c\u9019\u4e09\u500b\u6a21\u578b\u5f7c\u6b64\u4e92\u76f8\u7121\u95dc\u9023\u3002\u7531\u65bc\u6bcf\u4e00\u500b\u6a21\u578b\u6240\u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u80fd\u529b\u90fd\u4e0d\u540c\uff0c\u4e5f\u8a31\u6a21\u578b\u4e00\u5728\u67d0\u500b\u5340\u6bb5\u7684\u8cc7\u6599\u6709\u4e0d\u592a\u597d\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u800c\u6a21\u578b\u4e8c\u80fd\u88dc\u8db3\u6a21\u578b\u4e00\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\u3002\u85c9\u7531\u4e0a\u8ff0\u9019\u500b\u89c0\u9ede\u6211\u5011\u5c07\u4e09\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u8f38\u51fa\u96c6\u5408\u8d77\u4f86(P1\u3001P2\u3001P3)\uff0c\u5982\u679c\u662f\u5206\u985e\u554f\u984c\u53ef\u4ee5\u900f\u904e\u6295\u7968\u65b9\u5f0f\uff0c\u800c\u8ff4\u6b78\u554f\u984c\u53ef\u4ee5\u63a1\u7528\u5e73\u5747\u6cd5\u6216\u662f\u52a0\u6b0a\u5e73\u5747\u6cd5\u5c07\u6240\u6709\u7684\u9810\u6e2c\u505a\u6700\u5f8c\u8a55\u4f30\u3002\u53c8\u6216\u8005\u662f\u53ef\u4ee5\u5c07\u9019\u4e09\u500b\u8f38\u51fa\u503c\u7576\u4f5c\u662f\u65b0\u6a21\u578b\u7684\u7279\u5fb5\u518d\u4e1f\u5165\u4e00\u500b\u6a5f\u5668\u5b78\u578b\u6a21\u578b\u505a\u6700\u5f8c\u7684\u9810\u6e2c\u5f97\u5230\u6700\u7d42\u8f38\u51fa\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c Stacking \u65b9\u6cd5\u5efa\u6a21\u3002\u4e26\u89c0\u5bdf\u540c\u4e00\u7d44\u8cc7\u6599\u5728\u55ae\u4e00\u6a21\u578b\u4e0b\u9810\u6e2c\uff0c\u8207\u52a0\u5165 Stacking \u6a5f\u5236\u5f8c\u7684\u7d50\u679c\u6709\u7121\u6539\u5584\u3002 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u9996\u5148\u6211\u5011\u5920\u904e Sklearn \u5957\u4ef6\u8b80\u5165\u6ce2\u58eb\u9813\u623f\u50f9\u8cc7\u6599\u96c6\uff0c\u4e26\u5c07\u8f38\u5165\u7279\u5fb5\u8207\u623f\u50f9\u5408\u4f75\u6210\u4e00\u500b DataFrame\u3002\u5728\u6b64\u8cc7\u6599\u96c6\u4e2d\u7e3d\u5171\u6709 13 \u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4ee5\u53ca\u4e00\u500b\u8f38\u51fa MEDV \u5373\u70ba\u623f\u50f9\u3002 # load boston_dataset boston_dataset = load_boston () boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston 2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u8457\u91cd\u65bc\u6bd4\u8f03\u6a21\u578b\u7684\u5dee\u7570\uff0c\u56e0\u6b64\u6c92\u6709\u6309\u7167\u6b63\u5e38\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u8d70\u3002\u8cc7\u6599\u8996\u89ba\u5316\u4ee5\u53ca\u524d\u8655\u7406...\u7b49\u662f\u975e\u5e38\u91cd\u8981\u7684\u54e6\uff01\u5728\u6b64\u6b65\u9a5f\u6211\u5011\u5feb\u901f\u7269\u7684\u5c07\u4e7e\u6de8\u7684\u8cc7\u6599\u5207\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0c\u5176\u4e2d\u8a13\u7df4\u96c6 X_train \u8207 y_train \u662f\u5be6\u969b\u53c3\u8207\u884c\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c X_test \u8207 y_test \u662f\u672a\u53c3\u8207\u8a13\u7df4\u7684\u8cc7\u6599\uff0c\u5b83\u662f\u88ab\u62ff\u4f86\u6e2c\u8a66\u8a55\u4f30\u6700\u7d42\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from sklearn.model_selection import train_test_split X = boston . drop ([ 'MEDV' ], axis = 1 ) . values y = boston [[ 'MEDV' ]] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.1 , random_state = 42 ) print ( 'Training data shape:' , X_train . shape ) print ( 'Testing data shape:' , X_test . shape ) \u7531\u65bc Sklearn \u8cc7\u6599\u96c6\u63d0\u4f9b\u7684\u8cc7\u6599\u6a23\u672c\u6578\u6bd4\u8f03\u5c11\uff0c\u56e0\u6b64\u6e2c\u8a66\u96c6\u50c5\u5207\u51fa 0.1 \u7684\u8cc7\u6599\u3002 \u57f7\u884c\u7d50\u679c\uff1a Training data shape: (455, 13) Testing data shape: (51, 13) XGBoost \u6a21\u578b \u56e0\u70ba\u8981\u8207 Stacking \u505a\u4e00\u500b\u6bd4\u8f03\u3002\u56e0\u6b64\u9019\u88e1\u4f7f\u7528 XGBoost \u5148\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4e26\u5c07\u7d50\u679c\u8207 Stacking \u505a\u6bd4\u8f03\u3002 from xgboost import XGBRegressor # \u5efa\u7acb XGBRegressor \u6a21\u578b xgboostModel = XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgboostModel . predict ( X_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , xgboostModel . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , xgboostModel . score ( X_test , y_test )) \u5f9e\u9810\u6e2c\u7d50\u679c\u6211\u5011\u5148\u4f86\u67e5\u770b R2 score\uff0c\u4e00\u5207\u770b\u4f3c\u9084 ok\u3002\u4e0d\u904e\u9019\u88e1\u8981\u547c\u7c72\u5404\u4f4d\u8b80\u8005\u7d55\u4e0d\u8981\u770b R2 \u5206\u6578\u9ad8\u5c31\u9ad8\u8208\u5f97\u592a\u65e9\uff01 \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9999920949016282 \u6e2c\u8a66\u96c6 Score: 0.9292786904177338 \u6211\u5011\u4f86\u770b\u4e00\u4e0b MSE \u5be6\u969b\u7b97\u4e00\u4e0b\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u8aa4\u5dee\u3002\u53ef\u4ee5\u767c\u73fe\u5f88\u660e\u986f\u7684\u904e\u5ea6\u64ec\u5408\u4e86\uff0c\u7c21\u55ae\u4f86\u8aaa\u5728\u8a13\u7df4\u96c6\u7684\u8cc7\u6599\u7b97\u51fa\u4f86\u7684 MSE \u5f88\u5c0f\uff0c\u4f46\u662f\u5728\u6e2c\u8a66\u96c6\u4e2d MSE \u9810\u6e2c\u80fd\u529b\u4e0d\u8db3\u9020\u6210\u8aa4\u5dee\u8b8a\u5927\u3002 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = xgboostModel . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = xgboostModel . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 0.0006847746512112584 \u6e2c\u8a66\u96c6 MSE: 4.415429632025227 Stacking \u6a21\u578b Stacking \u7d50\u5408\u8a31\u591a\u5f31\u5b78\u7fd2\u5668\uff0c\u5c07\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\u7576\u4f5c\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u63a5\u8457\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u5efa\u7acb\u4e86\u56db\u7a2e\u8ff4\u6b78\u5668\uff0c\u5206\u5225\u6709\u96a8\u6a5f\u68ee\u6797\u3001\u652f\u6301\u5411\u91cf\u6a5f\u3001KNN \u8207\u6c7a\u7b56\u6a39\u3002\u6700\u7d42\u7684\u6a21\u578b\u6211\u5011\u63a1\u7528\u5169\u5c64\u96b1\u85cf\u5c64\u7684\u795e\u7d93\u7db2\u8def\u4f5c\u70ba\u6700\u5f8c\u7684\u623f\u50f9\u9810\u6e2c\u8a55\u4f30\u6a21\u578b\u3002 Parameters: - estimators: m \u500b\u5f31\u5b78\u7fd2\u5668\u3002 - final_estimator: \u96c6\u5408\u6240\u6709\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\uff0c\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u9810\u6e2c\u6a21\u578b\u3002\u9810\u8a2d\u70baLogisticRegression\u3002 Attributes: - estimators_: \u67e5\u770b\u5f31\u5b78\u7fd2\u5668\u7d44\u5408\u3002 - final_estimator: \u67e5\u770b\u6700\u7d42\u6574\u5408\u8a13\u7df4\u6a21\u578b\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn import svm from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import StackingRegressor from sklearn.neural_network import MLPRegressor estimators = [ ( 'rf' , RandomForestRegressor ( random_state = 42 )), ( 'svr' , svm . SVR ()), ( 'knn' , KNeighborsRegressor ()), ( 'dt' , DecisionTreeRegressor ( random_state = 42 )) ] clf = StackingRegressor ( estimators = estimators , final_estimator = MLPRegressor ( activation = \"relu\" , alpha = 0.1 , hidden_layer_sizes = ( 8 , 8 ), learning_rate = \"constant\" , max_iter = 2000 , random_state = 1000 ) ) clf . fit ( X_train , y_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , clf . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , clf . score ( X_test , y_test )) \u6211\u5011\u5148\u89c0\u5bdf\u8a13\u7df4\u5f8c\u7684 R2 score \u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u5206\u6578\u3002\u5f9e\u6578\u503c\u770b\u89c0\u5bdf\u53ef\u4ee5\u767c\u73fe\u900f\u904e\u5806\u758a\u6cd5\u5169\u8005\u9593\u7684\u5206\u6578\u5dee\u8ddd\u8b8a\u5c0f\u4e86\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9608703782891547 \u6e2c\u8a66\u96c6 Score: 0.9371735287625855 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = clf . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = clf . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u63a5\u8457\u6211\u5011\u4e00\u6a23\u8a08\u7b97 MSE \u5be6\u969b\u89c0\u5bdf\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8aa4\u5dee\u3002\u5f9e\u8a08\u7b97\u89e3\u679c\u53ef\u4ee5\u770b\u5230\u5169\u8005\u7684\u8aa4\u5dee\u90fd\u662f\u5dee\u4e0d\u591a\u7684\u3002\u5f9e\u9019\u88e1\u6211\u5011\u5c31\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u900f\u904e Stacking \u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u64ec\u5408\uff0c\u4e26\u4e14\u900f\u904e\u591a\u500b\u57fa\u5e95\u7684\u6a21\u578b\u8b93\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6709\u6bd4\u8f03\u5e73\u6ed1\u7684\u8f38\u51fa\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 3.389581229598408 \u6e2c\u8a66\u96c6 MSE: 3.9225215768179433 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking)"},{"location":"16.Stacking/#day-16-stacking","text":"","title":"[Day 16] \u6bcf\u500b\u6a21\u578b\u6211\u5168\u90fd\u8981 - \u5806\u758a\u6cd5 (Stacking)"},{"location":"16.Stacking/#_1","text":"\u4e86\u89e3 Stacking \u65b9\u6cd5 \u5806\u758a\u6cd5\u7684\u5b78\u7fd2\u6a5f\u5236\u70ba\u4f55\uff1f \u5229\u7528 Stacking \u5be6\u4f5c\u8ff4\u6b78\u5668 \u900f\u904e Stacking Regressor \u5efa\u7acb\u623f\u50f9\u9810\u6e2c\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"16.Stacking/#_2","text":"\u5806\u758a\u6cd5 (Stacking) \u662f\u6574\u9ad4\u5b78\u7fd2\u4e2d\u5176\u4e2d\u4e00\u7a2e\u5be6\u4f8b\u3002\u5b83\u662f\u7d50\u5408\u8a31\u591a\u7368\u7acb\u7684\u6a21\u578b\u6240\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\uff0c\u4e26\u5c07\u6bcf\u500b\u7368\u7acb\u6a21\u578b\u7684\u8f38\u51fa\u8996\u70ba\u6700\u7d42\u6a21\u578b\u9810\u6e2c\u7684\u8f38\u5165\u7279\u5fb5\uff0c\u6700\u5f8c\u518d\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u6a21\u578b\u3002\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u4e8b\u5148\u8a13\u7df4\u4e09\u500b\u57fa\u5e95\u7684\u6a21\u578b (base learner)\uff0c\u9019\u4e09\u500b\u6a21\u578b\u5f7c\u6b64\u4e92\u76f8\u7121\u95dc\u9023\u3002\u7531\u65bc\u6bcf\u4e00\u500b\u6a21\u578b\u6240\u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u80fd\u529b\u90fd\u4e0d\u540c\uff0c\u4e5f\u8a31\u6a21\u578b\u4e00\u5728\u67d0\u500b\u5340\u6bb5\u7684\u8cc7\u6599\u6709\u4e0d\u592a\u597d\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u800c\u6a21\u578b\u4e8c\u80fd\u88dc\u8db3\u6a21\u578b\u4e00\u9810\u6e2c\u4e0d\u597d\u7684\u5730\u65b9\u3002\u85c9\u7531\u4e0a\u8ff0\u9019\u500b\u89c0\u9ede\u6211\u5011\u5c07\u4e09\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u8f38\u51fa\u96c6\u5408\u8d77\u4f86(P1\u3001P2\u3001P3)\uff0c\u5982\u679c\u662f\u5206\u985e\u554f\u984c\u53ef\u4ee5\u900f\u904e\u6295\u7968\u65b9\u5f0f\uff0c\u800c\u8ff4\u6b78\u554f\u984c\u53ef\u4ee5\u63a1\u7528\u5e73\u5747\u6cd5\u6216\u662f\u52a0\u6b0a\u5e73\u5747\u6cd5\u5c07\u6240\u6709\u7684\u9810\u6e2c\u505a\u6700\u5f8c\u8a55\u4f30\u3002\u53c8\u6216\u8005\u662f\u53ef\u4ee5\u5c07\u9019\u4e09\u500b\u8f38\u51fa\u503c\u7576\u4f5c\u662f\u65b0\u6a21\u578b\u7684\u7279\u5fb5\u518d\u4e1f\u5165\u4e00\u500b\u6a5f\u5668\u5b78\u578b\u6a21\u578b\u505a\u6700\u5f8c\u7684\u9810\u6e2c\u5f97\u5230\u6700\u7d42\u8f38\u51fa\u3002","title":"\u524d\u8a00"},{"location":"16.Stacking/#_3","text":"\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c Stacking \u65b9\u6cd5\u5efa\u6a21\u3002\u4e26\u89c0\u5bdf\u540c\u4e00\u7d44\u8cc7\u6599\u5728\u55ae\u4e00\u6a21\u578b\u4e0b\u9810\u6e2c\uff0c\u8207\u52a0\u5165 Stacking \u6a5f\u5236\u5f8c\u7684\u7d50\u679c\u6709\u7121\u6539\u5584\u3002","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"16.Stacking/#1","text":"\u9996\u5148\u6211\u5011\u5920\u904e Sklearn \u5957\u4ef6\u8b80\u5165\u6ce2\u58eb\u9813\u623f\u50f9\u8cc7\u6599\u96c6\uff0c\u4e26\u5c07\u8f38\u5165\u7279\u5fb5\u8207\u623f\u50f9\u5408\u4f75\u6210\u4e00\u500b DataFrame\u3002\u5728\u6b64\u8cc7\u6599\u96c6\u4e2d\u7e3d\u5171\u6709 13 \u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4ee5\u53ca\u4e00\u500b\u8f38\u51fa MEDV \u5373\u70ba\u623f\u50f9\u3002 # load boston_dataset boston_dataset = load_boston () boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"16.Stacking/#2","text":"\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u8457\u91cd\u65bc\u6bd4\u8f03\u6a21\u578b\u7684\u5dee\u7570\uff0c\u56e0\u6b64\u6c92\u6709\u6309\u7167\u6b63\u5e38\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u8d70\u3002\u8cc7\u6599\u8996\u89ba\u5316\u4ee5\u53ca\u524d\u8655\u7406...\u7b49\u662f\u975e\u5e38\u91cd\u8981\u7684\u54e6\uff01\u5728\u6b64\u6b65\u9a5f\u6211\u5011\u5feb\u901f\u7269\u7684\u5c07\u4e7e\u6de8\u7684\u8cc7\u6599\u5207\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0c\u5176\u4e2d\u8a13\u7df4\u96c6 X_train \u8207 y_train \u662f\u5be6\u969b\u53c3\u8207\u884c\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c X_test \u8207 y_test \u662f\u672a\u53c3\u8207\u8a13\u7df4\u7684\u8cc7\u6599\uff0c\u5b83\u662f\u88ab\u62ff\u4f86\u6e2c\u8a66\u8a55\u4f30\u6700\u7d42\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from sklearn.model_selection import train_test_split X = boston . drop ([ 'MEDV' ], axis = 1 ) . values y = boston [[ 'MEDV' ]] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.1 , random_state = 42 ) print ( 'Training data shape:' , X_train . shape ) print ( 'Testing data shape:' , X_test . shape ) \u7531\u65bc Sklearn \u8cc7\u6599\u96c6\u63d0\u4f9b\u7684\u8cc7\u6599\u6a23\u672c\u6578\u6bd4\u8f03\u5c11\uff0c\u56e0\u6b64\u6e2c\u8a66\u96c6\u50c5\u5207\u51fa 0.1 \u7684\u8cc7\u6599\u3002 \u57f7\u884c\u7d50\u679c\uff1a Training data shape: (455, 13) Testing data shape: (51, 13)","title":"2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"16.Stacking/#xgboost","text":"\u56e0\u70ba\u8981\u8207 Stacking \u505a\u4e00\u500b\u6bd4\u8f03\u3002\u56e0\u6b64\u9019\u88e1\u4f7f\u7528 XGBoost \u5148\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4e26\u5c07\u7d50\u679c\u8207 Stacking \u505a\u6bd4\u8f03\u3002 from xgboost import XGBRegressor # \u5efa\u7acb XGBRegressor \u6a21\u578b xgboostModel = XGBRegressor () # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c predicted = xgboostModel . predict ( X_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , xgboostModel . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , xgboostModel . score ( X_test , y_test )) \u5f9e\u9810\u6e2c\u7d50\u679c\u6211\u5011\u5148\u4f86\u67e5\u770b R2 score\uff0c\u4e00\u5207\u770b\u4f3c\u9084 ok\u3002\u4e0d\u904e\u9019\u88e1\u8981\u547c\u7c72\u5404\u4f4d\u8b80\u8005\u7d55\u4e0d\u8981\u770b R2 \u5206\u6578\u9ad8\u5c31\u9ad8\u8208\u5f97\u592a\u65e9\uff01 \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9999920949016282 \u6e2c\u8a66\u96c6 Score: 0.9292786904177338 \u6211\u5011\u4f86\u770b\u4e00\u4e0b MSE \u5be6\u969b\u7b97\u4e00\u4e0b\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u8aa4\u5dee\u3002\u53ef\u4ee5\u767c\u73fe\u5f88\u660e\u986f\u7684\u904e\u5ea6\u64ec\u5408\u4e86\uff0c\u7c21\u55ae\u4f86\u8aaa\u5728\u8a13\u7df4\u96c6\u7684\u8cc7\u6599\u7b97\u51fa\u4f86\u7684 MSE \u5f88\u5c0f\uff0c\u4f46\u662f\u5728\u6e2c\u8a66\u96c6\u4e2d MSE \u9810\u6e2c\u80fd\u529b\u4e0d\u8db3\u9020\u6210\u8aa4\u5dee\u8b8a\u5927\u3002 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = xgboostModel . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = xgboostModel . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u57f7\u884c\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 0.0006847746512112584 \u6e2c\u8a66\u96c6 MSE: 4.415429632025227","title":"XGBoost \u6a21\u578b"},{"location":"16.Stacking/#stacking","text":"Stacking \u7d50\u5408\u8a31\u591a\u5f31\u5b78\u7fd2\u5668\uff0c\u5c07\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\u7576\u4f5c\u65b0\u7684\u6a21\u578b\u7684\u8f38\u5165\u63a5\u8457\u9810\u6e2c\u6700\u7d42\u7d50\u679c\u3002\u5728\u6b64\u7bc4\u4f8b\u4e2d\u6211\u5011\u5efa\u7acb\u4e86\u56db\u7a2e\u8ff4\u6b78\u5668\uff0c\u5206\u5225\u6709\u96a8\u6a5f\u68ee\u6797\u3001\u652f\u6301\u5411\u91cf\u6a5f\u3001KNN \u8207\u6c7a\u7b56\u6a39\u3002\u6700\u7d42\u7684\u6a21\u578b\u6211\u5011\u63a1\u7528\u5169\u5c64\u96b1\u85cf\u5c64\u7684\u795e\u7d93\u7db2\u8def\u4f5c\u70ba\u6700\u5f8c\u7684\u623f\u50f9\u9810\u6e2c\u8a55\u4f30\u6a21\u578b\u3002 Parameters: - estimators: m \u500b\u5f31\u5b78\u7fd2\u5668\u3002 - final_estimator: \u96c6\u5408\u6240\u6709\u5f31\u5b78\u7fd2\u5668\u7684\u8f38\u51fa\uff0c\u8a13\u7df4\u4e00\u500b\u6700\u7d42\u9810\u6e2c\u6a21\u578b\u3002\u9810\u8a2d\u70baLogisticRegression\u3002 Attributes: - estimators_: \u67e5\u770b\u5f31\u5b78\u7fd2\u5668\u7d44\u5408\u3002 - final_estimator: \u67e5\u770b\u6700\u7d42\u6574\u5408\u8a13\u7df4\u6a21\u578b\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn import svm from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import StackingRegressor from sklearn.neural_network import MLPRegressor estimators = [ ( 'rf' , RandomForestRegressor ( random_state = 42 )), ( 'svr' , svm . SVR ()), ( 'knn' , KNeighborsRegressor ()), ( 'dt' , DecisionTreeRegressor ( random_state = 42 )) ] clf = StackingRegressor ( estimators = estimators , final_estimator = MLPRegressor ( activation = \"relu\" , alpha = 0.1 , hidden_layer_sizes = ( 8 , 8 ), learning_rate = \"constant\" , max_iter = 2000 , random_state = 1000 ) ) clf . fit ( X_train , y_train ) print ( \"\u8a13\u7df4\u96c6 Score: \" , clf . score ( X_train , y_train )) print ( \"\u6e2c\u8a66\u96c6 Score: \" , clf . score ( X_test , y_test )) \u6211\u5011\u5148\u89c0\u5bdf\u8a13\u7df4\u5f8c\u7684 R2 score \u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u5206\u6578\u3002\u5f9e\u6578\u503c\u770b\u89c0\u5bdf\u53ef\u4ee5\u767c\u73fe\u900f\u904e\u5806\u758a\u6cd5\u5169\u8005\u9593\u7684\u5206\u6578\u5dee\u8ddd\u8b8a\u5c0f\u4e86\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 Score: 0.9608703782891547 \u6e2c\u8a66\u96c6 Score: 0.9371735287625855 from sklearn import metrics # \u8a13\u7df4\u96c6 MSE train_pred = clf . predict ( X_train ) mse = metrics . mean_squared_error ( y_train , train_pred ) print ( '\u8a13\u7df4\u96c6 MSE: ' , mse ) # \u6e2c\u8a66\u96c6 MSE test_pred = clf . predict ( X_test ) mse = metrics . mean_squared_error ( y_test , test_pred ) print ( '\u6e2c\u8a66\u96c6 MSE: ' , mse ) \u63a5\u8457\u6211\u5011\u4e00\u6a23\u8a08\u7b97 MSE \u5be6\u969b\u89c0\u5bdf\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8aa4\u5dee\u3002\u5f9e\u8a08\u7b97\u89e3\u679c\u53ef\u4ee5\u770b\u5230\u5169\u8005\u7684\u8aa4\u5dee\u90fd\u662f\u5dee\u4e0d\u591a\u7684\u3002\u5f9e\u9019\u88e1\u6211\u5011\u5c31\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u900f\u904e Stacking \u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u64ec\u5408\uff0c\u4e26\u4e14\u900f\u904e\u591a\u500b\u57fa\u5e95\u7684\u6a21\u578b\u8b93\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6709\u6bd4\u8f03\u5e73\u6ed1\u7684\u8f38\u51fa\u3002 \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6 MSE: 3.389581229598408 \u6e2c\u8a66\u96c6 MSE: 3.9225215768179433 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Stacking \u6a21\u578b"},{"location":"17.LightGBM/","text":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 LightGBM \u8207 XGBoost \u6bd4\u8f03 \u4e86\u89e3 LightGBM \u512a\u9ede \u5be6\u4f5c LightGBM \u8655\u7406\u8cc7\u6599\u4e0d\u5e73\u8861\u8cc7\u6599 \u4fe1\u7528\u5361\u76dc\u5237\u5075\u6e2c (\u4e8c\u5143\u5206\u985e) \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 LightGBM \u662f\u5c6c\u65bc GBDT \u5bb6\u65cf\u4e2d\u6210\u54e1\u4e4b\u4e00\uff0c\u76f8\u8f03\u65bc\u5148\u524d\u4ecb\u7d39\u7684 XGBoost \u5169\u8005\u53ef\u4ee5\u62ff\u4f86\u505a\u6bd4\u8f03\u3002\u7c21\u55ae\u4f86\u8aaa\u5f9e LightGBM \u540d\u5b57\u4e0a\u89c0\u5bdf\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5b83\u662f\u8f15\u91cf\u5316 (Light) \u7684\u68af\u5ea6\u63d0\u5347\u6a5f (GBM) \u7684\u5be6\u4f8b\u3002\u5176\u76f8\u5c0d XGBoost \u4f86\u8aaa\u5b83\u5177\u6709\u8a13\u7df4\u901f\u5ea6\u5feb\u3001\u8a18\u61b6\u9ad4\u4f54\u7528\u4f4e\u7684\u7279\u9ede\uff0c\u56e0\u6b64\u8fd1\u5e7e\u5e74 LightGBM \u5728 Kaggle \u4e0a\u4e5f\u7b97\u662f\u71b1\u9580\u6a21\u578b\u4e00\u3002 LightGBM \u8207 XGBoost \u6bd4\u8f03 \u9019\u5169\u7a2e\u6f14\u7b97\u6cd5\u90fd\u4f7f\u7528\u8caa\u5a6a\u7684\u65b9\u6cd5\u4f86\u6700\u5c0f\u5316\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\u4f86\u69cb\u5efa\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u3002\u5176 tree-based \u6f14\u7b97\u6cd5\u6240\u9762\u81e8\u7684\u6311\u6230\u662f\u5982\u4f55\u6311\u9078\u6700\u4f73\u7684\u8449\u7bc0\u9ede\u7684\u5207\u5272\u65b9\u5f0f\uff0c\u7136\u800c LightGBM \u548c XGBoost \u5206\u5225\u4f7f\u7528\u4e0d\u540c\u7684\u512a\u5316\u6280\u8853\u8207\u65b9\u6cd5\u4f86\u8b58\u5225\u6700\u4f73\u7684\u5206\u5272\u9ede\u3002 LightGBM \u512a\u9ede LightGBM \u7531\u5fae\u8edf\u5718\u968a\u65bc 2017 \u5e74\u6240\u767c\u8868\u7684\u8ad6\u6587 LightGBM: A Highly Efficient Gradient Boosting Decision Tree \u88ab\u63d0\u51fa\u3002\u5176\u4e3b\u8981\u60f3\u6cd5\u662f\u5229\u7528\u6c7a\u7b56\u6a39\u70ba\u57fa\u5e95\u7684\u5f31\u5b78\u7fd2\u5668\uff0c\u4e0d\u65b7\u5730\u8fed\u4ee3\u8a13\u7df4\u4e26\u53d6\u5f97\u6700\u4f73\u7684\u6a21\u578b\u3002\u540c\u6642\u8a72\u6f14\u7b97\u6cd5\u9032\u884c\u4e86\u512a\u5316\u4f7f\u5f97\u8a13\u7df4\u901f\u5ea6\u8b8a\u5feb\uff0c\u4e26\u4e14\u6709\u6548\u964d\u88ab\u6d88\u8017\u7684\u8cc7\u6e90\u3002LightGBM \u4e5f\u662f\u500b\u958b\u6e90\u5c08\u6848\u5927\u5bb6\u53ef\u4ee5\u5728 GitHub \u4e0a\u53ef\u4ee5\u53d6\u5f97\u76f8\u95dc\u8cc7\u8a0a\u3002 \u5728\u5b98\u65b9\u7684\u6587\u4ef6\u4e2d\u4e5f\u689d\u5217\u4e86\u5e7e\u500b LightGBM \u7684\u512a\u9ede\uff1a - \u66f4\u5feb\u7684\u8a13\u7df4\u901f\u5ea6\u548c\u66f4\u9ad8\u7684\u6548\u7387 - \u4f4e\u8a18\u61b6\u9ad4\u4f7f\u7528\u7387 - \u66f4\u597d\u7684\u6e96\u78ba\u5ea6 - \u652f\u63f4 GPU \u5e73\u884c\u904b\u7b97 - \u80fd\u5920\u8655\u7406\u5927\u898f\u6a21\u6578\u64da LightGBM \u4f7f\u7528 leaf-wise tree \u6f14\u7b97\u6cd5\uff0c\u56e0\u6b64\u5728\u8fed\u4ee3\u904e\u7a0b\u4e2d\u80fd\u66f4\u5feb\u5730\u6536\u6582\u3002\u4f46\u662f leaf-wise tree \u65b9\u6cd5\u8f03\u5bb9\u6613\u904e\u64ec\u5408\u3002\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6587\u7ae0\u6700\u5f8c\u63d0\u4f9b\u7684\u76f8\u95dc\u8cc7\u6e90\u3002 \u8655\u7406 unbalance \u8cc7\u6599 \u5728\u4f7f\u7528 LightGBM \u505a\u5206\u985e\u5668\u6642\u8a72\u5982\u4f55\u8655\u7406\u6a23\u672c\u985e\u5225\u5206\u4f48\u4e0d\u5e73\u8861\u7684\u554f\u984c\uff1f\u4e00\u500b\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u8a2d\u5b9a is_unbalance=True \uff0c\u6216\u662f scale_pos_weight \u6ce8\u610f\u9019\u5169\u500b\u53c3\u6578\u53ea\u80fd\u64c7\u4e00\u4f7f\u7528\u3002\u4ee5\u4e0b\u6211\u5011\u5c31\u4f7f\u7528\u4e00\u500b\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\uff0c\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u4f86\u505a\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u53ef\u4ee5\u8f09\u5165 Google \u6240\u63d0\u4f9b\u7684\u4fe1\u7528\u5361\u76dc\u5237\u8cc7\u6599\u96c6\uff0c\u8a73\u7d30\u8cc7\u8a0a\u53ef\u4ee5\u53c3\u8003 \u9019\u88e1 \u3002 import pandas as pd raw_df = pd . read_csv ( 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv' ) X = raw_df . drop ( columns = [ 'Class' ]) y = raw_df [ 'Class' ] print ( 'X:' , X . shape ) print ( 'Y:' , y . shape ) \u8f09\u5165\u6210\u529f\u5f8c\u6211\u5011\u53ef\u4ee5\u770b\u5230\u8a72\u8cc7\u6599\u96c6\u5171\u6709 284807 \u7b46\u8cc7\u6599\uff0c\u6bcf\u4e00\u7b46\u8cc7\u6599\u6709 30 \u500b\u7279\u5fb5\u3002 X: (284807, 30) Y: (284807,) \u70ba\u4e86\u65b9\u4fbf\u6aa2\u8996\u5be6\u9a57\u7d50\u679c\uff0c\u6211\u5011\u4f9d\u7167 y \u7684\u6bd4\u4f8b\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u9019\u88e1\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c stratify \u70ba\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\u6642\u4f8b\u5982\uff1a\u8ca0\u6a23\u672c\u53ef\u80fd\u6bd4\u6b63\u6a23\u672c\u591a\u5e7e\u500d\u3002\u5728\u9019\u7a2e\u60c5\u6cc1\u4e0b\uff0c\u5efa\u8b70\u4f7f\u7528\u5206\u5c64\u62bd\u6a23\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'X_train:' , X_train . shape ) print ( 'X_test:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X_train: (199364, 30) X_test: (85443, 30) \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7d93\u7531 7:3 \u7684\u6bd4\u4f8b\u4e0b\u53bb\u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u53ef\u4ee5\u767c\u73fe\u5207\u5272\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5728\u76dc\u5237(1)\u8207\u975e\u76dc\u5237(0)\u7684\u8cc7\u6599\u6bd4\u4f8b\u662f\u5dee\u4e0d\u591a\u7684\u3002 \u63a5\u4e0b\u4f86\u91cd\u982d\u6232\u51fa\u5834\u3002\u6211\u5011\u63a1\u7528 LightGBM \u5206\u985e\u5668\uff0c\u82e5\u9084\u6c92\u5b89\u88dd\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\u3002 pip install lightgbm \u5b89\u88dd\u7d50\u675f\u5f8c\u5373\u53ef\u8f09\u5165 lightgbm \u5957\u4ef6\u4e26\u9078\u7528 LGBMClassifier \u5206\u985e\u5668\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u5728\u5efa\u7acb\u5206\u985e\u5668\u540c\u6642\u8a2d\u5b9a\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u9019\u88e1\u6211\u5011\u4f86\u793a\u7bc4\u4f7f\u7528 is_unbalance=True \u8a13\u7df4\u6a21\u578b\u3002\u9664\u6b64\u4e4b\u5916\u6a21\u578b\u7684\u8d85\u53c3\u6578\u6709\u5f88\u591a\uff0c\u53ef\u4ee5\u7531 \u5b98\u65b9 \u6587\u4ef6\u4e2d\u67e5\u95b1\u3002\u4ee5\u4e0b\u5e6b\u5404\u4f4d\u6574\u7406\u5e38\u7528\u7684\u65b9\u6cd5\uff1a Parameters: - num_iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.1\u3002 - boosting: \u9078\u64c7 boosting \u7a2e\u985e\u3002\u5171\u56db\u7a2e gbdt\u3001rf\u3001dart\u3001goss\uff0c\u9810\u8a2d\u70ba gbdt\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9810\u8a2d\u503c\u70ba-1\u5373\u8868\u793a\u7121\u9650\u5236\u3002 - min_data_in_leaf: \u4e00\u500b\u5b50\u8449\u4e2d\u6700\u5c11\u6578\u64da\uff0c\u53ef\u7528\u65bc\u8655\u7406\u904e\u64ec\u5408\u3002\u9810\u8a2d20\u7b46\u3002 - max_bin: \u5c07\u7279\u5fb5\u503c\u653e\u5165\u6876\u4e2d\u7684\u6700\u5927bins\u6578\u3002\u9810\u8a2d\u70ba255\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import lightgbm as lgb model = lgb . LGBMClassifier ( is_unbalance = True ) model . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u5373\u53ef\u4f7f\u7528\u525b\u5207\u5272\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6a21\u578b\u8a55\u4f30\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u6e96\u78ba\u7387\u9ad8\u9054 94%\u3002 from sklearn.metrics import accuracy_score pred = model . predict ( X_test ) print ( \"Accuracy:\" , accuracy_score ( y_test , pred )) \u8f38\u51fa\u7d50\u679c\uff1a Accuracy: 0.9401706400758401 \u5982\u679c\u8981\u5224\u65b7\u5206\u985e\u5668\u7684\u597d\u58de\uff0c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f86\u8a55\u4f30\u662f\u4e00\u500b\u4e0d\u597d\u7684\u7fd2\u6163\u3002\u6211\u5011\u61c9\u8a72\u5584\u7528\u6df7\u6dc6\u77e9\u9663\u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u4e26\u67e5\u770b\u6b63\u6a23\u672c\u8207\u8ca0\u6a23\u672c\u5728\u9810\u6e2c\u4e0a\u7684\u8868\u73fe\u3002\u9996\u5148\u6211\u5011\u5148\u4f86\u5beb\u4e00\u500b\u8a08\u7b97\u6df7\u6dc6\u77e9\u9663\u7684\u51fd\u5f0f\uff0c\u4e26\u7528 seaborn \u7e6a\u88fd\u51fa\u71b1\u529b\u5716\u77e9\u9663\u3002 import seaborn as sns import matplotlib.pyplot as plt def plot_confusion_matrix ( actual_val , pred_val , title = None ): confusion_matrix = pd . crosstab ( actual_val , pred_val , rownames = [ 'Actual' ], colnames = [ 'Predicted' ]) plot = sns . heatmap ( confusion_matrix , annot = True , fmt = ',.0f' ) if title is None : pass else : plot . set_title ( title ) plt . show () \u5728\u8a55\u4f30\u6a21\u578b\u4e4b\u524d\u6211\u5011\u5148\u4f86\u67e5\u770b\u6e2c\u8a66\u96c6\u8f38\u51fa y \u7684\u5206\u4f48\u5404\u662f\u591a\u5c11\u3002\u900f\u904e numpy \u7684 unique \u65b9\u6cd5\u53ef\u4ee5\u8a08\u7b97 y_test \u4e2d\u6bcf\u500b\u985e\u5225\u7684\u6578\u91cf\u3002\u5f9e\u8f38\u51fa\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\uff0c85443 \u7b46\u6e2c\u8a66\u96c6\u4e2d\u5171\u6709 85295 \u7b46\u662f\u6a19\u7c64 0(\u672a\u76dc\u5237)\u3001148 \u7b46\u662f\u6a19\u7c64 1(\u76dc\u5237)\u3002\u77e5\u9053\u9019\u4e9b\u771f\u5be6\u6578\u64da\u7684\u6578\u91cf\u5f8c\uff0c\u63a5\u4e0b\u4f86\u6211\u5011\u5c31\u53ef\u4ee5\u900f\u904e\u6df7\u6dc6\u77e9\u9663\u4f86\u67e5\u770b\u6a21\u578b\u662f\u5426\u6709\u5c07\u9019\u4e9b\u76dc\u5237\u7684\u8cc7\u6599\u88ab\u6b63\u78ba\u9810\u6e2c\u51fa\u4f86\u3002 import numpy as np unique , counts = np . unique ( y_test , return_counts = True ) dict ( zip ( unique , counts )) \u8f38\u51fa\u7d50\u679c\uff1a { 0 : 85295 , 1 : 148 } plot_confusion_matrix \u51fd\u5f0f\u5efa\u7acb\u5b8c\u6210\u5f8c\u5373\u53ef\u547c\u53eb\u3002\u6b64\u51fd\u5f0f\u6709\u4e09\u500b\u8f38\u5165\uff0c\u5206\u5225\u70ba y_test \u5be6\u969b\u8f38\u51fa\u7b54\u6848\u3001 pred \u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3001title \u5716\u8868\u6a19\u984c(\u9810\u8a2dNone)\u3002\u76f8\u5c0d\u61c9\u7684\u8b8a\u6578\u8f38\u5165\u5f8c\u5373\u53ef\u5f97\u5230\u8a08\u7b97\u597d\u7684\u6df7\u6dc6\u77e9\u9663\u3002 plot_confusion_matrix ( y_test , pred ) \u4e0b\u5716\u70ba\u5be6\u969b is_unbalance=True \u7684\u8a13\u7df4\u7d50\u679c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5728\u6e2c\u8a66\u96c6\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\uff0c\u5176\u4e2d\u6709 124 \u7b46\u76dc\u5237\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u771f\u5be6\u7b54\u6848\u662f\u6c92\u76dc\u5237\u7684\u8cc7\u6599\u5c45\u7136\u6709 5088 \u7b46\u88ab\u8aa4\u5224\u6210\u76dc\u5237\u3002 \u6211\u5011\u518d\u4f86\u8a66\u8a66\u5c07 is_unbalance \u8a2d\u70ba False \u4e26\u89c0\u5bdf\u6df7\u6dc6\u77e9\u9663\u3002\u53ef\u4ee5\u767c\u73fe\u96d6\u7136\u8aa4\u5224\u7684\u6578\u91cf\u6e1b\u5c11\u4e86\uff0c\u4f46\u662f\u771f\u5be6\u7b54\u6848\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\u50c5\u6709 88 \u7b46\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u6211\u5011\u53ef\u4ee5\u731c\u60f3\u6a21\u578b\u5728\u5927\u591a\u6578\u72c0\u6cc1\u90fd\u6703\u9810\u6e2c\u8cc7\u6599\u672a\u88ab\u76dc\u5237\u7684\u6a5f\u7387\u8f03\u5927\u3002 Reference \u7d42\u65bc\u6709\u4eba\u628aXGBoost \u548c LightGBM \u8b1b\u660e\u767d\u4e86\uff0c\u9805\u76ee\u4e2d\u6700\u4e3b\u6d41\u7684\u96c6\u6210\u6f14\u7b97\u6cd5\uff01 Lightgbm\u57fa\u672c\u539f\u7406\u4ecb\u7d39 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM"},{"location":"17.LightGBM/#day-17-lightgbm","text":"","title":"[Day 17] \u8f15\u91cf\u5316\u7684\u68af\u5ea6\u63d0\u5347\u6a5f - LightGBM"},{"location":"17.LightGBM/#_1","text":"LightGBM \u8207 XGBoost \u6bd4\u8f03 \u4e86\u89e3 LightGBM \u512a\u9ede \u5be6\u4f5c LightGBM \u8655\u7406\u8cc7\u6599\u4e0d\u5e73\u8861\u8cc7\u6599 \u4fe1\u7528\u5361\u76dc\u5237\u5075\u6e2c (\u4e8c\u5143\u5206\u985e) \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"17.LightGBM/#_2","text":"LightGBM \u662f\u5c6c\u65bc GBDT \u5bb6\u65cf\u4e2d\u6210\u54e1\u4e4b\u4e00\uff0c\u76f8\u8f03\u65bc\u5148\u524d\u4ecb\u7d39\u7684 XGBoost \u5169\u8005\u53ef\u4ee5\u62ff\u4f86\u505a\u6bd4\u8f03\u3002\u7c21\u55ae\u4f86\u8aaa\u5f9e LightGBM \u540d\u5b57\u4e0a\u89c0\u5bdf\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5b83\u662f\u8f15\u91cf\u5316 (Light) \u7684\u68af\u5ea6\u63d0\u5347\u6a5f (GBM) \u7684\u5be6\u4f8b\u3002\u5176\u76f8\u5c0d XGBoost \u4f86\u8aaa\u5b83\u5177\u6709\u8a13\u7df4\u901f\u5ea6\u5feb\u3001\u8a18\u61b6\u9ad4\u4f54\u7528\u4f4e\u7684\u7279\u9ede\uff0c\u56e0\u6b64\u8fd1\u5e7e\u5e74 LightGBM \u5728 Kaggle \u4e0a\u4e5f\u7b97\u662f\u71b1\u9580\u6a21\u578b\u4e00\u3002","title":"\u524d\u8a00"},{"location":"17.LightGBM/#lightgbm-xgboost","text":"\u9019\u5169\u7a2e\u6f14\u7b97\u6cd5\u90fd\u4f7f\u7528\u8caa\u5a6a\u7684\u65b9\u6cd5\u4f86\u6700\u5c0f\u5316\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\u4f86\u69cb\u5efa\u6240\u6709\u7684\u5f31\u5b78\u7fd2\u5668\u3002\u5176 tree-based \u6f14\u7b97\u6cd5\u6240\u9762\u81e8\u7684\u6311\u6230\u662f\u5982\u4f55\u6311\u9078\u6700\u4f73\u7684\u8449\u7bc0\u9ede\u7684\u5207\u5272\u65b9\u5f0f\uff0c\u7136\u800c LightGBM \u548c XGBoost \u5206\u5225\u4f7f\u7528\u4e0d\u540c\u7684\u512a\u5316\u6280\u8853\u8207\u65b9\u6cd5\u4f86\u8b58\u5225\u6700\u4f73\u7684\u5206\u5272\u9ede\u3002","title":"LightGBM \u8207 XGBoost \u6bd4\u8f03"},{"location":"17.LightGBM/#lightgbm","text":"LightGBM \u7531\u5fae\u8edf\u5718\u968a\u65bc 2017 \u5e74\u6240\u767c\u8868\u7684\u8ad6\u6587 LightGBM: A Highly Efficient Gradient Boosting Decision Tree \u88ab\u63d0\u51fa\u3002\u5176\u4e3b\u8981\u60f3\u6cd5\u662f\u5229\u7528\u6c7a\u7b56\u6a39\u70ba\u57fa\u5e95\u7684\u5f31\u5b78\u7fd2\u5668\uff0c\u4e0d\u65b7\u5730\u8fed\u4ee3\u8a13\u7df4\u4e26\u53d6\u5f97\u6700\u4f73\u7684\u6a21\u578b\u3002\u540c\u6642\u8a72\u6f14\u7b97\u6cd5\u9032\u884c\u4e86\u512a\u5316\u4f7f\u5f97\u8a13\u7df4\u901f\u5ea6\u8b8a\u5feb\uff0c\u4e26\u4e14\u6709\u6548\u964d\u88ab\u6d88\u8017\u7684\u8cc7\u6e90\u3002LightGBM \u4e5f\u662f\u500b\u958b\u6e90\u5c08\u6848\u5927\u5bb6\u53ef\u4ee5\u5728 GitHub \u4e0a\u53ef\u4ee5\u53d6\u5f97\u76f8\u95dc\u8cc7\u8a0a\u3002 \u5728\u5b98\u65b9\u7684\u6587\u4ef6\u4e2d\u4e5f\u689d\u5217\u4e86\u5e7e\u500b LightGBM \u7684\u512a\u9ede\uff1a - \u66f4\u5feb\u7684\u8a13\u7df4\u901f\u5ea6\u548c\u66f4\u9ad8\u7684\u6548\u7387 - \u4f4e\u8a18\u61b6\u9ad4\u4f7f\u7528\u7387 - \u66f4\u597d\u7684\u6e96\u78ba\u5ea6 - \u652f\u63f4 GPU \u5e73\u884c\u904b\u7b97 - \u80fd\u5920\u8655\u7406\u5927\u898f\u6a21\u6578\u64da LightGBM \u4f7f\u7528 leaf-wise tree \u6f14\u7b97\u6cd5\uff0c\u56e0\u6b64\u5728\u8fed\u4ee3\u904e\u7a0b\u4e2d\u80fd\u66f4\u5feb\u5730\u6536\u6582\u3002\u4f46\u662f leaf-wise tree \u65b9\u6cd5\u8f03\u5bb9\u6613\u904e\u64ec\u5408\u3002\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6587\u7ae0\u6700\u5f8c\u63d0\u4f9b\u7684\u76f8\u95dc\u8cc7\u6e90\u3002","title":"LightGBM \u512a\u9ede"},{"location":"17.LightGBM/#unbalance","text":"\u5728\u4f7f\u7528 LightGBM \u505a\u5206\u985e\u5668\u6642\u8a72\u5982\u4f55\u8655\u7406\u6a23\u672c\u985e\u5225\u5206\u4f48\u4e0d\u5e73\u8861\u7684\u554f\u984c\uff1f\u4e00\u500b\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u8a2d\u5b9a is_unbalance=True \uff0c\u6216\u662f scale_pos_weight \u6ce8\u610f\u9019\u5169\u500b\u53c3\u6578\u53ea\u80fd\u64c7\u4e00\u4f7f\u7528\u3002\u4ee5\u4e0b\u6211\u5011\u5c31\u4f7f\u7528\u4e00\u500b\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\uff0c\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u4f86\u505a\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u53ef\u4ee5\u8f09\u5165 Google \u6240\u63d0\u4f9b\u7684\u4fe1\u7528\u5361\u76dc\u5237\u8cc7\u6599\u96c6\uff0c\u8a73\u7d30\u8cc7\u8a0a\u53ef\u4ee5\u53c3\u8003 \u9019\u88e1 \u3002 import pandas as pd raw_df = pd . read_csv ( 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv' ) X = raw_df . drop ( columns = [ 'Class' ]) y = raw_df [ 'Class' ] print ( 'X:' , X . shape ) print ( 'Y:' , y . shape ) \u8f09\u5165\u6210\u529f\u5f8c\u6211\u5011\u53ef\u4ee5\u770b\u5230\u8a72\u8cc7\u6599\u96c6\u5171\u6709 284807 \u7b46\u8cc7\u6599\uff0c\u6bcf\u4e00\u7b46\u8cc7\u6599\u6709 30 \u500b\u7279\u5fb5\u3002 X: (284807, 30) Y: (284807,) \u70ba\u4e86\u65b9\u4fbf\u6aa2\u8996\u5be6\u9a57\u7d50\u679c\uff0c\u6211\u5011\u4f9d\u7167 y \u7684\u6bd4\u4f8b\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u9019\u88e1\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c stratify \u70ba\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\u6642\u4f8b\u5982\uff1a\u8ca0\u6a23\u672c\u53ef\u80fd\u6bd4\u6b63\u6a23\u672c\u591a\u5e7e\u500d\u3002\u5728\u9019\u7a2e\u60c5\u6cc1\u4e0b\uff0c\u5efa\u8b70\u4f7f\u7528\u5206\u5c64\u62bd\u6a23\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'X_train:' , X_train . shape ) print ( 'X_test:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X_train: (199364, 30) X_test: (85443, 30) \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7d93\u7531 7:3 \u7684\u6bd4\u4f8b\u4e0b\u53bb\u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u53ef\u4ee5\u767c\u73fe\u5207\u5272\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5728\u76dc\u5237(1)\u8207\u975e\u76dc\u5237(0)\u7684\u8cc7\u6599\u6bd4\u4f8b\u662f\u5dee\u4e0d\u591a\u7684\u3002 \u63a5\u4e0b\u4f86\u91cd\u982d\u6232\u51fa\u5834\u3002\u6211\u5011\u63a1\u7528 LightGBM \u5206\u985e\u5668\uff0c\u82e5\u9084\u6c92\u5b89\u88dd\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\u3002 pip install lightgbm \u5b89\u88dd\u7d50\u675f\u5f8c\u5373\u53ef\u8f09\u5165 lightgbm \u5957\u4ef6\u4e26\u9078\u7528 LGBMClassifier \u5206\u985e\u5668\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u5728\u5efa\u7acb\u5206\u985e\u5668\u540c\u6642\u8a2d\u5b9a\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u9019\u88e1\u6211\u5011\u4f86\u793a\u7bc4\u4f7f\u7528 is_unbalance=True \u8a13\u7df4\u6a21\u578b\u3002\u9664\u6b64\u4e4b\u5916\u6a21\u578b\u7684\u8d85\u53c3\u6578\u6709\u5f88\u591a\uff0c\u53ef\u4ee5\u7531 \u5b98\u65b9 \u6587\u4ef6\u4e2d\u67e5\u95b1\u3002\u4ee5\u4e0b\u5e6b\u5404\u4f4d\u6574\u7406\u5e38\u7528\u7684\u65b9\u6cd5\uff1a Parameters: - num_iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba100\u3002 - learning_rate: \u5b78\u7fd2\u901f\u7387\uff0c\u9810\u8a2d0.1\u3002 - boosting: \u9078\u64c7 boosting \u7a2e\u985e\u3002\u5171\u56db\u7a2e gbdt\u3001rf\u3001dart\u3001goss\uff0c\u9810\u8a2d\u70ba gbdt\u3002 - max_depth: \u6a39\u7684\u6700\u5927\u6df1\u5ea6\uff0c\u9810\u8a2d\u503c\u70ba-1\u5373\u8868\u793a\u7121\u9650\u5236\u3002 - min_data_in_leaf: \u4e00\u500b\u5b50\u8449\u4e2d\u6700\u5c11\u6578\u64da\uff0c\u53ef\u7528\u65bc\u8655\u7406\u904e\u64ec\u5408\u3002\u9810\u8a2d20\u7b46\u3002 - max_bin: \u5c07\u7279\u5fb5\u503c\u653e\u5165\u6876\u4e2d\u7684\u6700\u5927bins\u6578\u3002\u9810\u8a2d\u70ba255\u3002 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 import lightgbm as lgb model = lgb . LGBMClassifier ( is_unbalance = True ) model . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u5373\u53ef\u4f7f\u7528\u525b\u5207\u5272\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6a21\u578b\u8a55\u4f30\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u6e96\u78ba\u7387\u9ad8\u9054 94%\u3002 from sklearn.metrics import accuracy_score pred = model . predict ( X_test ) print ( \"Accuracy:\" , accuracy_score ( y_test , pred )) \u8f38\u51fa\u7d50\u679c\uff1a Accuracy: 0.9401706400758401 \u5982\u679c\u8981\u5224\u65b7\u5206\u985e\u5668\u7684\u597d\u58de\uff0c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f86\u8a55\u4f30\u662f\u4e00\u500b\u4e0d\u597d\u7684\u7fd2\u6163\u3002\u6211\u5011\u61c9\u8a72\u5584\u7528\u6df7\u6dc6\u77e9\u9663\u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\uff0c\u4e26\u67e5\u770b\u6b63\u6a23\u672c\u8207\u8ca0\u6a23\u672c\u5728\u9810\u6e2c\u4e0a\u7684\u8868\u73fe\u3002\u9996\u5148\u6211\u5011\u5148\u4f86\u5beb\u4e00\u500b\u8a08\u7b97\u6df7\u6dc6\u77e9\u9663\u7684\u51fd\u5f0f\uff0c\u4e26\u7528 seaborn \u7e6a\u88fd\u51fa\u71b1\u529b\u5716\u77e9\u9663\u3002 import seaborn as sns import matplotlib.pyplot as plt def plot_confusion_matrix ( actual_val , pred_val , title = None ): confusion_matrix = pd . crosstab ( actual_val , pred_val , rownames = [ 'Actual' ], colnames = [ 'Predicted' ]) plot = sns . heatmap ( confusion_matrix , annot = True , fmt = ',.0f' ) if title is None : pass else : plot . set_title ( title ) plt . show () \u5728\u8a55\u4f30\u6a21\u578b\u4e4b\u524d\u6211\u5011\u5148\u4f86\u67e5\u770b\u6e2c\u8a66\u96c6\u8f38\u51fa y \u7684\u5206\u4f48\u5404\u662f\u591a\u5c11\u3002\u900f\u904e numpy \u7684 unique \u65b9\u6cd5\u53ef\u4ee5\u8a08\u7b97 y_test \u4e2d\u6bcf\u500b\u985e\u5225\u7684\u6578\u91cf\u3002\u5f9e\u8f38\u51fa\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\uff0c85443 \u7b46\u6e2c\u8a66\u96c6\u4e2d\u5171\u6709 85295 \u7b46\u662f\u6a19\u7c64 0(\u672a\u76dc\u5237)\u3001148 \u7b46\u662f\u6a19\u7c64 1(\u76dc\u5237)\u3002\u77e5\u9053\u9019\u4e9b\u771f\u5be6\u6578\u64da\u7684\u6578\u91cf\u5f8c\uff0c\u63a5\u4e0b\u4f86\u6211\u5011\u5c31\u53ef\u4ee5\u900f\u904e\u6df7\u6dc6\u77e9\u9663\u4f86\u67e5\u770b\u6a21\u578b\u662f\u5426\u6709\u5c07\u9019\u4e9b\u76dc\u5237\u7684\u8cc7\u6599\u88ab\u6b63\u78ba\u9810\u6e2c\u51fa\u4f86\u3002 import numpy as np unique , counts = np . unique ( y_test , return_counts = True ) dict ( zip ( unique , counts )) \u8f38\u51fa\u7d50\u679c\uff1a { 0 : 85295 , 1 : 148 } plot_confusion_matrix \u51fd\u5f0f\u5efa\u7acb\u5b8c\u6210\u5f8c\u5373\u53ef\u547c\u53eb\u3002\u6b64\u51fd\u5f0f\u6709\u4e09\u500b\u8f38\u5165\uff0c\u5206\u5225\u70ba y_test \u5be6\u969b\u8f38\u51fa\u7b54\u6848\u3001 pred \u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3001title \u5716\u8868\u6a19\u984c(\u9810\u8a2dNone)\u3002\u76f8\u5c0d\u61c9\u7684\u8b8a\u6578\u8f38\u5165\u5f8c\u5373\u53ef\u5f97\u5230\u8a08\u7b97\u597d\u7684\u6df7\u6dc6\u77e9\u9663\u3002 plot_confusion_matrix ( y_test , pred ) \u4e0b\u5716\u70ba\u5be6\u969b is_unbalance=True \u7684\u8a13\u7df4\u7d50\u679c\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5728\u6e2c\u8a66\u96c6\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\uff0c\u5176\u4e2d\u6709 124 \u7b46\u76dc\u5237\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u53e6\u5916\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u771f\u5be6\u7b54\u6848\u662f\u6c92\u76dc\u5237\u7684\u8cc7\u6599\u5c45\u7136\u6709 5088 \u7b46\u88ab\u8aa4\u5224\u6210\u76dc\u5237\u3002 \u6211\u5011\u518d\u4f86\u8a66\u8a66\u5c07 is_unbalance \u8a2d\u70ba False \u4e26\u89c0\u5bdf\u6df7\u6dc6\u77e9\u9663\u3002\u53ef\u4ee5\u767c\u73fe\u96d6\u7136\u8aa4\u5224\u7684\u6578\u91cf\u6e1b\u5c11\u4e86\uff0c\u4f46\u662f\u771f\u5be6\u7b54\u6848\u4e2d\u6709 148 \u7b46\u76dc\u5237\u8cc7\u6599\u50c5\u6709 88 \u7b46\u88ab\u6210\u529f\u8fa8\u8b58\u51fa\u4f86\u3002\u6211\u5011\u53ef\u4ee5\u731c\u60f3\u6a21\u578b\u5728\u5927\u591a\u6578\u72c0\u6cc1\u90fd\u6703\u9810\u6e2c\u8cc7\u6599\u672a\u88ab\u76dc\u5237\u7684\u6a5f\u7387\u8f03\u5927\u3002","title":"\u8655\u7406 unbalance \u8cc7\u6599"},{"location":"17.LightGBM/#reference","text":"\u7d42\u65bc\u6709\u4eba\u628aXGBoost \u548c LightGBM \u8b1b\u660e\u767d\u4e86\uff0c\u9805\u76ee\u4e2d\u6700\u4e3b\u6d41\u7684\u96c6\u6210\u6f14\u7b97\u6cd5\uff01 Lightgbm\u57fa\u672c\u539f\u7406\u4ecb\u7d39 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"18.CatBoost/","text":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 CatBoost \u6a21\u578b \u5be6\u4f5c CatBoost \u8ff4\u6b78\u6a21\u578b-\u623f\u50f9\u9810\u6e2c \u6a21\u578b\u8a13\u7df4\u3001\u7279\u5fb5\u7be9\u9078 \u8d85\u53c3\u6578\u641c\u7d22 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u53ef\u89e3\u91cb\u5316\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 CatBoost \u540c\u6a23\u662f\u57fa\u65bc Gradient Boosting Tree \u7684\u68af\u5ea6\u63d0\u5347\u6a39\u6a21\u578b\u6846\u67b6\uff0c\u6700\u5927\u7684\u7279\u9ede\u5c0d\u985e\u5225\u7279\u5fb5\u7684\u76f4\u63a5\u652f\u63f4\uff0c\u751a\u81f3\u5141\u8a31\u5b57\u4e32\u985e\u578b\u7684\u7279\u5fb5\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u8fd1\u5e74\u4f86\u5728 Kaggle \u4e0a\u7684\u6bd4\u8cfd\u9678\u7e8c\u6709\u4eba\u4f7f\u7528 CatBoost \u65b9\u6cd5\u4e26\u53d6\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u65bc\u662f\u5c31\u4f86\u64b0\u5beb\u6587\u7ae0\u9806\u4fbf\u4f86\u77a7\u77a7\u5b83\u8207\u5176\u4ed6 Boosting \u6f14\u7b97\u6cd5\u4e0d\u540c\u4e4b\u8655\u3002\u5176\u4e2d\u6700\u7279\u5225\u7684\u5730\u65b9\u662f CatBoost \u80fd\u5920\u8655\u7406\u975e\u6578\u503c\u578b\u614b\u7684\u8cc7\u6599\uff0c\u4e5f\u5c31\u662f\u8aac\u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002CatBoost \u63a1\u7528\u6c7a\u7b56\u6a39\u68af\u5ea6\u63d0\u5347\u65b9\u6cd5\u4e26\u5ba3\u7a31\u5728\u6548\u80fd\u4e0a\u6bd4 XGBoost \u548c LightGBM \u66f4\u52a0\u512a\u5316\uff0c\u540c\u6642\u652f\u63f4 CPU \u548c GPU \u904b\u7b97\u3002\u8207\u5176\u4ed6 Boosting \u65b9\u6cd5\u76f8\u6bd4 CatBoost \u662f\u4e00\u7a2e\u76f8\u5c0d\u8f03\u65b0\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u7b97\u6cd5\u3002\u8a72\u6f14\u7b97\u6cd5\u662f\u7531\u4e00\u9593\u4fc4\u7f85\u65af\u7684\u516c\u53f8 Yandex \u65bc 2017 \u5e74\u6240\u63d0\u51fa\uff0c\u540c\u6642\u5728 arxiv \u6709\u4e00\u7bc7 CatBoost: unbiased boosting with categorical features \u7684\u8ad6\u6587\uff0c\u6587\u4e2d\u4f5c\u8005\u8a73\u7d30\u8aaa\u660e\u4e86 CatBoost \u7684\u65b9\u6cd5\u8207\u512a\u9ede\u3002 CatBoost \u512a\u9ede CatBoost \u540d\u7a31\u6e90\u65bc Category \u548c Boost \u5169\u500b\u55ae\u8a5e\uff0c\u627f\u8972 Boosting \u7684\u512a\u9ede\u4e4b\u5916\u8a72\u6f14\u7b97\u6cd5\u5728\u985e\u5225\u578b\u7684\u7279\u5fb5\u4e0a\u505a\u4e86\u4e00\u4e9b\u66f4\u516c\u5e73\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5141\u8a31\u6c92\u6709\u7de8\u78bc\u7684\u985e\u5225\u7279\u5fb5\uff0c\u900f\u904e\u5206\u985e\u548c\u6578\u5b57\u7279\u5fb5\u7d44\u5408\u7684\u5404\u7a2e\u7d71\u8a08\u91cf\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7de8\u78bc\u3002\u4e0d\u904e\u5728\u8a13\u7df4\u524d\u5fc5\u9808\u78ba\u4fdd\u8a72\u7279\u5fb5\u4e2d\u7121\u7f3a\u5931\u503c\u3002\u5176\u8a13\u7df4\u8cc7\u6599\u82e5\u6709\u7f3a\u5931\u503c CatBoost \u9810\u8a2d\u6703\u5c07\u6578\u503c\u578b\u7684\u8cc7\u6599\u88dc\u4e0a\u6700\u5c0f\u503c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5 \u53c3\u8003 \u3002\u53e6\u5916\u5c0d\u65bc GPU \u7684\u4f7f\u7528\u8005\uff0c\u5b83\u4e5f\u80fd\u8655\u7406\u5b57\u4e32(\u985e\u5225)\u578b\u614b\u7684\u7279\u5fb5\u3002 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u81ea\u52d5\u8655\u7406\u7f3a\u5931\u503c \u53ef\u4ee5\u8655\u7406\u5404\u7a2e\u6578\u64da\u985e\u578b\uff0c\u5982\u97f3\u983b\u3001\u6587\u5b57\u3001\u5716\u50cf \u6e1b\u5c11\u4eba\u5de5\u8abf\u53c3\u7684\u9700\u8981\uff0c\u4e26\u964d\u4f4e\u4e86\u904e\u64ec\u5408\u7684\u6a5f\u6703 CatBoost \u5b89\u88dd CatBoost \u6f14\u7b97\u6cd5\u53ef\u4ee5\u89e3\u6c7a\u5206\u985e (CatBoostClassifier) \u548c\u8ff4\u6b78 (CatBoostRegressor) \u7684\u554f\u984c\u3002\u5b89\u88dd\u7684\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u4f7f\u7528 pip \u5c31\u80fd\u8f15\u9b06\u5b89\u88dd\u3002 pip install catboost CatBoost Parameters CatBoost \u57fa\u672c\u4e0a\u53ef\u4ee5\u81ea\u7531\u7684\u8b93\u6f14\u7b97\u6cd5\u53bb\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff0c\u4e0d\u904e API \u9084\u662f\u63d0\u4f9b\u4e00\u4e9b\u57fa\u672c\u7684\u8d85\u53c3\u6578\u8b93\u4f7f\u7528\u8005\u624b\u52d5\u8abf\u6574\u3002 Parameters: - iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba 1000\u3002 - use_best_model: \u8a2d\u5b9a True \u6642\u5fc5\u9808\u7d66\u5b9a\u9a57\u8b49\u96c6\uff0c\u5c07\u6703\u7559\u4e0b\u9a57\u8b49\u96c6\u5206\u4e2d\u6578\u6700\u9ad8\u7684\u6a21\u578b\u3002 - early_stopping_rounds: \u9023\u7e8c\u8a13\u7df4N\u4ee3\uff0c\u82e5\u7d50\u679c\u672a\u6539\u5584\u5247\u63d0\u65e9\u505c\u6b62\u8a13\u7df4\u3002 - od_type: IncToDec/Iter\uff0c\u9810\u8a2d Iter \u9632\u6b62 Overfitting \u8a55\u4f30\u65b9\u5f0f\uff0c\u82e5\u8a2d\u5b9a\u524d\u8005\u9700\u8981\u8a2d\u5b9a\u95a5\u503c\u3002 - eval_metric: \u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002 - loss_function: \u8a08\u7b97loss\u65b9\u6cd5\u3002 - verbose: True(1)/Flase(0)\uff0c\u9810\u8a2d1\u986f\u793a\u8a13\u7df4\u904e\u7a0b\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - learning_rate: \u9810\u8a2d automatically\u3002 - depth: \u6a39\u7684\u6df1\u5ea6\uff0c\u9810\u8a2d 6\u3002 - cat_features: \u8f38\u5165\u985e\u5225\u7279\u5fb5\u7684\u7d22\u5f15\uff0c\u5b83\u6703\u81ea\u52d5\u5e6b\u4f60\u8655\u7406\u3002 \u53c3\u8003 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 \u5982\u679c\u9700\u8981\u624b\u52d5\u8655\u7406 Overfitting \u554f\u984c\u53ef\u4ee5\u53c3\u8003\u9019\u4efd\u5b98\u65b9 \u6587\u4ef6 \u6a21\u578b\u8a13\u7df4 \u6a21\u578b\u8a13\u7df4\u65b9\u5f0f\u57fa\u672c\u4e0a\u8207 XGBoost \u4e00\u6a23\uff0c\u5982\u679c\u4f60\u719f\u6089 sklearn \u7684\u8a71 CatBoost \u7684\u4f7f\u7528\u65b9\u5f0f\u57fa\u672c\u4e0a\u5927\u540c\u5c0f\u7570\u3002\u53ea\u4e0d\u904e\u5728 CatBoost \u4e2d\u591a\u4e86\u4e00\u4e9b\u65b9\u4fbf\u7684\u65b9\u6cd5\u548c\u53c3\u6578\u53ef\u4ee5\u4f7f\u7528\u3002\u50cf\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u52a0\u4e0a plot=True \uff0c\u4e26\u5728 eval_set \u53c3\u6578\u4e2d\u63d2\u5165\u6e2c\u8a66\u96c6\u53ef\u4ee5\u5373\u6642\u770b\u5230\u8a13\u7df4\u904e\u7a0b\u7684\u8996\u89ba\u5316\u5206\u6790\u3002\u751a\u81f3\u53ef\u4ee5\u4f7f\u7528\u4ea4\u53c9\u9a57\u8b49\uff0c\u5728\u4e0d\u540c\u7684\u5206\u5272\u4e0a\u89c0\u5bdf\u6a21\u578b\u6e96\u78ba\u5ea6\u7684\u5e73\u5747\u548c\u6a19\u6e96\u504f\u5dee\u3002 from catboost import CatBoostRegressor # \u5efa\u7acb\u6a21\u578b model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b model . fit ( X_train , y_train , eval_set = ( X_test , y_test ), verbose = 0 , plot = True ) \u7279\u5fb5\u7be9\u9078 \u8a13\u7df4\u904e\u7a0b\u4e2d\u6703\u81ea\u52d5\u5f9e\u8cc7\u6599\u4e2d\u7be9\u9078\u5c0d\u6a21\u578b\u9810\u6e2c\u6709\u7528\u7684\u7279\u5fb5\uff0c\u4e26\u79fb\u9664\u7121\u5e6b\u52a9\u9810\u6e2c\u7684\u7279\u5fb5\u3002 \u53c3\u8003 from catboost import CatBoostRegressor , Pool , EShapCalcType , EFeaturesSelectionAlgorithm # feature_names = ['F{}'.format(i) for i in range(X_train.shape[1])] train_pool = Pool ( X_train , y_train , feature_names = boston_dataset . feature_names . tolist ()) test_pool = Pool ( X_test , y_test , feature_names = boston_dataset . feature_names . tolist ()) model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) summary = model . select_features ( train_pool , eval_set = test_pool , features_for_select = '0-12' , num_features_to_select = 3 , steps = 2 , algorithm = EFeaturesSelectionAlgorithm . RecursiveByShapValues , shap_calc_type = EShapCalcType . Regular , train_final_model = True , logging_level = 'Silent' , plot = False ) summary \u7531\u65bc\u5728\u8a13\u7df4\u5c07 num_features_to_select \u8a2d\u70ba\u4e09\uff0c\u5373\u8868\u793a\u6a21\u578b\u8a13\u7df4\u6642\u6703\u62ff\u53d6\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u7576\u4f5c\u505a\u4e2d\u6a21\u578b\u9810\u6e2c\u65b9\u5f0f\u3002\u6211\u5011\u63a1\u7528 sklearn \u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u70ba ['RM', 'PTRATIO', 'LSTAT']\u3002\u5982\u679c\u4f60\u6709\u505a EDA \u53ef\u4ee5\u767c\u73fe\u9019\u4e09\u500b\u7279\u5fb5\u8207\u623f\u50f9\u7684\u95dc\u806f\u6027\u90fd\u5f88\u9ad8\u3002 {'selected_features': [5, 10, 12], 'eliminated_features_names': ['DIS', 'B', 'ZN', 'CHAS', 'RAD', 'INDUS', 'CRIM', 'AGE', 'TAX', 'NOX'], 'eliminated_features': [7, 11, 1, 3, 8, 2, 0, 6, 9, 4], 'selected_features_names': ['RM', 'PTRATIO', 'LSTAT']} Grid search \u9664\u6b64\u4e4b\u5916 CatBoost \u63d0\u4f9b\u5c0d\u6a21\u578b\u7684\u6307\u5b9a\u53c3\u6578\u503c\u9032\u884c\u7c21\u55ae\u7684\u7db2\u683c\u641c\u7d22\uff0c\u5982\u679c\u6709\u4f7f\u7528\u904e sklearn \u7684 Grid Search \u5176\u5be6\u4ed6\u5c31\u662f\u4e00\u6a23\u7684\u4f7f\u7528\u65b9\u5f0f\u3002 \u53c3\u8003 from catboost import CatBoostRegressor grid = { 'iterations' : [ 100 , 150 , 200 ], 'learning_rate' : [ 0.03 , 0.1 ], 'depth' : [ 2 , 4 , 6 , 8 ], 'l2_leaf_reg' : [ 0.2 , 0.5 , 1 , 3 ]} model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' ) model . grid_search ( grid , X_train , y_train ) \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 CatBoost \u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002\u4e0b\u9762\u7a0b\u5f0f\u70ba\u4e00\u500b\u5206\u985e\u554f\u984c\u7684\u7bc4\u4f8b\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u7684\u7b2c\u4e00\u500b\u70ba\u5b63\u7bc0\u3002\u5728\u6a5f\u5668\u5b78\u7fd2\u4e0a\u7684\u8a8d\u77e5\u6211\u5011\u5fc5\u9808\u5c07\u6240\u4ee5\u5b57\u4e32\u578b\u8cc7\u6599\u5fc5\u9808\u900f\u904e\u6a19\u7c64\u7de8\u78bc\u65b9\u5f0f\u8f49\u63db\u6210\u6578\u503c\uff0c\u7136\u800c\u5728 CatBoost \u5b8c\u5168\u4e0d\u9700\u8981\u3002\u50c5\u9700\u5728\u8a13\u7df4\u6a21\u578b\u6642\u7d66\u4e88\u53c3\u6578 cat_features = [0] \u5373\u4ee3\u8868\u8cc7\u6599\u7684\u7b2c\u4e00\u500b\u7279\u5fb5\u9700\u8981\u9032\u884c\u985e\u5225\u8f49\u63db\u3002\u53e6\u5916\u8f38\u51fa\u8449\u4e0d\u4e00\u5b9a\u8981\u7de8\u78bc\u5f8c\u7684\u7d50\u679c\uff0c\u4f60\u4e5f\u53ef\u4ee5\u4e1f\u5165\u6587\u5b57\u9032\u884c\u8a13\u7df4\u53ea\u8981\u52a0\u4e0a loss_function='MultiClass' \u5373\u53ef\u3002 from catboost import Pool , CatBoostClassifier train_data = [[ \"summer\" , 1924 , 44 ], [ \"summer\" , 1932 , 37 ], [ \"winter\" , 1980 , 37 ], [ \"summer\" , 2012 , 204 ]] eval_data = [[ \"winter\" , 1996 , 197 ], [ \"winter\" , 1968 , 37 ], [ \"summer\" , 2002 , 77 ], [ \"summer\" , 1948 , 59 ]] train_label = [ \"France\" , \"USA\" , \"USA\" , \"UK\" ] eval_label = [ \"USA\" , \"France\" , \"USA\" , \"UK\" ] # Initialize CatBoostClassifier model = CatBoostClassifier ( iterations = 10 , learning_rate = 1 , depth = 2 , cat_features = [ 0 ], loss_function = 'MultiClass' ) # Fit model model . fit ( train_data , train_label ) # Get predicted classes preds_class = model . predict ( eval_data ) # Get predicted probabilities for each class preds_proba = model . predict_proba ( eval_data ) # Get predicted RawFormulaVal preds_raw = model . predict ( eval_data , prediction_type = 'RawFormulaVal' ) \u53c3\u8003 \u5584\u7528 Verbose \u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u96a8\u6642\u89c0\u5bdf\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684loss\uff0c\u4f7f\u7528verbose=10\u5373\u4ee3\u8868\u6bcf10\u6b21\u8fed\u4ee3\u6703\u986f\u793a\u4e00\u6b21\u8cc7\u8a0a\uff0c\u9019\u7a2e\u65b9\u5f0f\u4e5f\u89e3\u6c7a\u6bcf\u6b21\u758a\u4ee3\u986f\u793a\u4e00\u6b21\u7684\u56f0\u64fe\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5269\u9918\u6642\u9593\u4e5f\u6703\u986f\u793a\u51fa\u4f86\u3002 \u6a21\u578b\u7684\u89e3\u91cb CatBoost \u63d0\u4f9b\u4e86 plot \u53ef\u4ee5\u65b9\u4fbf\u5728\u8a13\u7df4\u6642\u67e5\u770b\u4e26\u4f5c\u5373\u6642\u5206\u6790\u8a13\u7df4\u8da8\u52e2\u3002\u9664\u6b64\u4e4b\u5916 CatBoost \u4e5f\u652f\u63f4 SHAP \u589e\u52a0\u4e86\u6a21\u578b\u53ef\u89e3\u91cb\u3002\u8a73\u7d30\u7684\u4f7f\u7528\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 \u6559\u5b78 \u3002 \u5c0f\u7d50 CatBoost \u7684\u512a\u9ede\u548c\u4f7f\u7528\u65b9\u6cd5\u90fd\u4ecb\u7d39\u5b8c\u4e86\uff0c\u662f\u4e0d\u662f\u89ba\u5f97\u5341\u5206\u7c21\u55ae\u6613\u7528\u4e14\u529f\u80fd\u5f37\u5927\u3002\u5c24\u5176\u662f\u9047\u5230\u8cc7\u6599\u9700\u8981\u5927\u91cf\u8655\u7406\u548c\u7279\u5fb5\u6578\u503c\u5316\u7684\u4efb\u52d9\u6642\u6700\u9069\u5408\u4f7f\u7528 CatBoost \u4e86\u3002\u771f\u7684\u662f\u6240\u8b02\u7684\u61f6\u4eba\u5957\u4ef6\uff0c\u540d\u7b26\u5176\u5be6\u7684 Ying Train Yi Fa(\u786cTrain\u4e00\u767c)! Reference Tutorial: CatBoost Overview SHAP Catboost tutorial CatBoost regression in 6 minutes Catboost\uff1a\u8d85\u8d8aLightgbm\u548cXGBoost\u7684\u53c8\u4e00\u500bboost\u7b97\u6cd5\u795e\u5668 CatBoost\u3001LightGBM\u3001XGBoost\uff0c\u9019\u4e9b\u7b97\u6cd5\u4f60\u90fd\u77ad\u89e3\u55ce\uff1f \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost"},{"location":"18.CatBoost/#day-18-boosting-catboost","text":"","title":"[Day 18] \u6a5f\u5668\u5b78\u7fd2 boosting \u795e\u5668 - CatBoost"},{"location":"18.CatBoost/#_1","text":"\u4e86\u89e3 CatBoost \u6a21\u578b \u5be6\u4f5c CatBoost \u8ff4\u6b78\u6a21\u578b-\u623f\u50f9\u9810\u6e2c \u6a21\u578b\u8a13\u7df4\u3001\u7279\u5fb5\u7be9\u9078 \u8d85\u53c3\u6578\u641c\u7d22 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u53ef\u89e3\u91cb\u5316\u6a21\u578b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"18.CatBoost/#_2","text":"CatBoost \u540c\u6a23\u662f\u57fa\u65bc Gradient Boosting Tree \u7684\u68af\u5ea6\u63d0\u5347\u6a39\u6a21\u578b\u6846\u67b6\uff0c\u6700\u5927\u7684\u7279\u9ede\u5c0d\u985e\u5225\u7279\u5fb5\u7684\u76f4\u63a5\u652f\u63f4\uff0c\u751a\u81f3\u5141\u8a31\u5b57\u4e32\u985e\u578b\u7684\u7279\u5fb5\u9032\u884c\u6a21\u578b\u8a13\u7df4\u3002\u8fd1\u5e74\u4f86\u5728 Kaggle \u4e0a\u7684\u6bd4\u8cfd\u9678\u7e8c\u6709\u4eba\u4f7f\u7528 CatBoost \u65b9\u6cd5\u4e26\u53d6\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u65bc\u662f\u5c31\u4f86\u64b0\u5beb\u6587\u7ae0\u9806\u4fbf\u4f86\u77a7\u77a7\u5b83\u8207\u5176\u4ed6 Boosting \u6f14\u7b97\u6cd5\u4e0d\u540c\u4e4b\u8655\u3002\u5176\u4e2d\u6700\u7279\u5225\u7684\u5730\u65b9\u662f CatBoost \u80fd\u5920\u8655\u7406\u975e\u6578\u503c\u578b\u614b\u7684\u8cc7\u6599\uff0c\u4e5f\u5c31\u662f\u8aac\u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002CatBoost \u63a1\u7528\u6c7a\u7b56\u6a39\u68af\u5ea6\u63d0\u5347\u65b9\u6cd5\u4e26\u5ba3\u7a31\u5728\u6548\u80fd\u4e0a\u6bd4 XGBoost \u548c LightGBM \u66f4\u52a0\u512a\u5316\uff0c\u540c\u6642\u652f\u63f4 CPU \u548c GPU \u904b\u7b97\u3002\u8207\u5176\u4ed6 Boosting \u65b9\u6cd5\u76f8\u6bd4 CatBoost \u662f\u4e00\u7a2e\u76f8\u5c0d\u8f03\u65b0\u7684\u958b\u6e90\u6a5f\u5668\u5b78\u7fd2\u7b97\u6cd5\u3002\u8a72\u6f14\u7b97\u6cd5\u662f\u7531\u4e00\u9593\u4fc4\u7f85\u65af\u7684\u516c\u53f8 Yandex \u65bc 2017 \u5e74\u6240\u63d0\u51fa\uff0c\u540c\u6642\u5728 arxiv \u6709\u4e00\u7bc7 CatBoost: unbiased boosting with categorical features \u7684\u8ad6\u6587\uff0c\u6587\u4e2d\u4f5c\u8005\u8a73\u7d30\u8aaa\u660e\u4e86 CatBoost \u7684\u65b9\u6cd5\u8207\u512a\u9ede\u3002","title":"\u524d\u8a00"},{"location":"18.CatBoost/#catboost","text":"CatBoost \u540d\u7a31\u6e90\u65bc Category \u548c Boost \u5169\u500b\u55ae\u8a5e\uff0c\u627f\u8972 Boosting \u7684\u512a\u9ede\u4e4b\u5916\u8a72\u6f14\u7b97\u6cd5\u5728\u985e\u5225\u578b\u7684\u7279\u5fb5\u4e0a\u505a\u4e86\u4e00\u4e9b\u66f4\u516c\u5e73\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5141\u8a31\u6c92\u6709\u7de8\u78bc\u7684\u985e\u5225\u7279\u5fb5\uff0c\u900f\u904e\u5206\u985e\u548c\u6578\u5b57\u7279\u5fb5\u7d44\u5408\u7684\u5404\u7a2e\u7d71\u8a08\u91cf\u70ba\u985e\u5225\u578b\u7684\u7279\u5fb5\u505a\u7de8\u78bc\u3002\u4e0d\u904e\u5728\u8a13\u7df4\u524d\u5fc5\u9808\u78ba\u4fdd\u8a72\u7279\u5fb5\u4e2d\u7121\u7f3a\u5931\u503c\u3002\u5176\u8a13\u7df4\u8cc7\u6599\u82e5\u6709\u7f3a\u5931\u503c CatBoost \u9810\u8a2d\u6703\u5c07\u6578\u503c\u578b\u7684\u8cc7\u6599\u88dc\u4e0a\u6700\u5c0f\u503c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5 \u53c3\u8003 \u3002\u53e6\u5916\u5c0d\u65bc GPU \u7684\u4f7f\u7528\u8005\uff0c\u5b83\u4e5f\u80fd\u8655\u7406\u5b57\u4e32(\u985e\u5225)\u578b\u614b\u7684\u7279\u5fb5\u3002 \u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5 \u81ea\u52d5\u8655\u7406\u7f3a\u5931\u503c \u53ef\u4ee5\u8655\u7406\u5404\u7a2e\u6578\u64da\u985e\u578b\uff0c\u5982\u97f3\u983b\u3001\u6587\u5b57\u3001\u5716\u50cf \u6e1b\u5c11\u4eba\u5de5\u8abf\u53c3\u7684\u9700\u8981\uff0c\u4e26\u964d\u4f4e\u4e86\u904e\u64ec\u5408\u7684\u6a5f\u6703","title":"CatBoost \u512a\u9ede"},{"location":"18.CatBoost/#catboost_1","text":"CatBoost \u6f14\u7b97\u6cd5\u53ef\u4ee5\u89e3\u6c7a\u5206\u985e (CatBoostClassifier) \u548c\u8ff4\u6b78 (CatBoostRegressor) \u7684\u554f\u984c\u3002\u5b89\u88dd\u7684\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u4f7f\u7528 pip \u5c31\u80fd\u8f15\u9b06\u5b89\u88dd\u3002 pip install catboost","title":"CatBoost \u5b89\u88dd"},{"location":"18.CatBoost/#catboost-parameters","text":"CatBoost \u57fa\u672c\u4e0a\u53ef\u4ee5\u81ea\u7531\u7684\u8b93\u6f14\u7b97\u6cd5\u53bb\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff0c\u4e0d\u904e API \u9084\u662f\u63d0\u4f9b\u4e00\u4e9b\u57fa\u672c\u7684\u8d85\u53c3\u6578\u8b93\u4f7f\u7528\u8005\u624b\u52d5\u8abf\u6574\u3002 Parameters: - iterations: \u7e3d\u5171\u8fed\u4ee3\u7684\u6b21\u6578\uff0c\u5373\u6c7a\u7b56\u6a39\u7684\u500b\u6578\u3002\u9810\u8a2d\u503c\u70ba 1000\u3002 - use_best_model: \u8a2d\u5b9a True \u6642\u5fc5\u9808\u7d66\u5b9a\u9a57\u8b49\u96c6\uff0c\u5c07\u6703\u7559\u4e0b\u9a57\u8b49\u96c6\u5206\u4e2d\u6578\u6700\u9ad8\u7684\u6a21\u578b\u3002 - early_stopping_rounds: \u9023\u7e8c\u8a13\u7df4N\u4ee3\uff0c\u82e5\u7d50\u679c\u672a\u6539\u5584\u5247\u63d0\u65e9\u505c\u6b62\u8a13\u7df4\u3002 - od_type: IncToDec/Iter\uff0c\u9810\u8a2d Iter \u9632\u6b62 Overfitting \u8a55\u4f30\u65b9\u5f0f\uff0c\u82e5\u8a2d\u5b9a\u524d\u8005\u9700\u8981\u8a2d\u5b9a\u95a5\u503c\u3002 - eval_metric: \u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002 - loss_function: \u8a08\u7b97loss\u65b9\u6cd5\u3002 - verbose: True(1)/Flase(0)\uff0c\u9810\u8a2d1\u986f\u793a\u8a13\u7df4\u904e\u7a0b\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u78ba\u4fdd\u6bcf\u6b21\u8a13\u7df4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - learning_rate: \u9810\u8a2d automatically\u3002 - depth: \u6a39\u7684\u6df1\u5ea6\uff0c\u9810\u8a2d 6\u3002 - cat_features: \u8f38\u5165\u985e\u5225\u7279\u5fb5\u7684\u7d22\u5f15\uff0c\u5b83\u6703\u81ea\u52d5\u5e6b\u4f60\u8655\u7406\u3002 \u53c3\u8003 Attributes: - feature_importances_: \u67e5\u8a62\u6a21\u578b\u7279\u5fb5\u7684\u91cd\u8981\u7a0b\u5ea6\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 \u5982\u679c\u9700\u8981\u624b\u52d5\u8655\u7406 Overfitting \u554f\u984c\u53ef\u4ee5\u53c3\u8003\u9019\u4efd\u5b98\u65b9 \u6587\u4ef6","title":"CatBoost Parameters"},{"location":"18.CatBoost/#_3","text":"\u6a21\u578b\u8a13\u7df4\u65b9\u5f0f\u57fa\u672c\u4e0a\u8207 XGBoost \u4e00\u6a23\uff0c\u5982\u679c\u4f60\u719f\u6089 sklearn \u7684\u8a71 CatBoost \u7684\u4f7f\u7528\u65b9\u5f0f\u57fa\u672c\u4e0a\u5927\u540c\u5c0f\u7570\u3002\u53ea\u4e0d\u904e\u5728 CatBoost \u4e2d\u591a\u4e86\u4e00\u4e9b\u65b9\u4fbf\u7684\u65b9\u6cd5\u548c\u53c3\u6578\u53ef\u4ee5\u4f7f\u7528\u3002\u50cf\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u52a0\u4e0a plot=True \uff0c\u4e26\u5728 eval_set \u53c3\u6578\u4e2d\u63d2\u5165\u6e2c\u8a66\u96c6\u53ef\u4ee5\u5373\u6642\u770b\u5230\u8a13\u7df4\u904e\u7a0b\u7684\u8996\u89ba\u5316\u5206\u6790\u3002\u751a\u81f3\u53ef\u4ee5\u4f7f\u7528\u4ea4\u53c9\u9a57\u8b49\uff0c\u5728\u4e0d\u540c\u7684\u5206\u5272\u4e0a\u89c0\u5bdf\u6a21\u578b\u6e96\u78ba\u5ea6\u7684\u5e73\u5747\u548c\u6a19\u6e96\u504f\u5dee\u3002 from catboost import CatBoostRegressor # \u5efa\u7acb\u6a21\u578b model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b model . fit ( X_train , y_train , eval_set = ( X_test , y_test ), verbose = 0 , plot = True )","title":"\u6a21\u578b\u8a13\u7df4"},{"location":"18.CatBoost/#_4","text":"\u8a13\u7df4\u904e\u7a0b\u4e2d\u6703\u81ea\u52d5\u5f9e\u8cc7\u6599\u4e2d\u7be9\u9078\u5c0d\u6a21\u578b\u9810\u6e2c\u6709\u7528\u7684\u7279\u5fb5\uff0c\u4e26\u79fb\u9664\u7121\u5e6b\u52a9\u9810\u6e2c\u7684\u7279\u5fb5\u3002 \u53c3\u8003 from catboost import CatBoostRegressor , Pool , EShapCalcType , EFeaturesSelectionAlgorithm # feature_names = ['F{}'.format(i) for i in range(X_train.shape[1])] train_pool = Pool ( X_train , y_train , feature_names = boston_dataset . feature_names . tolist ()) test_pool = Pool ( X_test , y_test , feature_names = boston_dataset . feature_names . tolist ()) model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' , use_best_model = True ) summary = model . select_features ( train_pool , eval_set = test_pool , features_for_select = '0-12' , num_features_to_select = 3 , steps = 2 , algorithm = EFeaturesSelectionAlgorithm . RecursiveByShapValues , shap_calc_type = EShapCalcType . Regular , train_final_model = True , logging_level = 'Silent' , plot = False ) summary \u7531\u65bc\u5728\u8a13\u7df4\u5c07 num_features_to_select \u8a2d\u70ba\u4e09\uff0c\u5373\u8868\u793a\u6a21\u578b\u8a13\u7df4\u6642\u6703\u62ff\u53d6\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u7576\u4f5c\u505a\u4e2d\u6a21\u578b\u9810\u6e2c\u65b9\u5f0f\u3002\u6211\u5011\u63a1\u7528 sklearn \u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u6700\u91cd\u8981\u7279\u5fb5\u70ba ['RM', 'PTRATIO', 'LSTAT']\u3002\u5982\u679c\u4f60\u6709\u505a EDA \u53ef\u4ee5\u767c\u73fe\u9019\u4e09\u500b\u7279\u5fb5\u8207\u623f\u50f9\u7684\u95dc\u806f\u6027\u90fd\u5f88\u9ad8\u3002 {'selected_features': [5, 10, 12], 'eliminated_features_names': ['DIS', 'B', 'ZN', 'CHAS', 'RAD', 'INDUS', 'CRIM', 'AGE', 'TAX', 'NOX'], 'eliminated_features': [7, 11, 1, 3, 8, 2, 0, 6, 9, 4], 'selected_features_names': ['RM', 'PTRATIO', 'LSTAT']}","title":"\u7279\u5fb5\u7be9\u9078"},{"location":"18.CatBoost/#grid-search","text":"\u9664\u6b64\u4e4b\u5916 CatBoost \u63d0\u4f9b\u5c0d\u6a21\u578b\u7684\u6307\u5b9a\u53c3\u6578\u503c\u9032\u884c\u7c21\u55ae\u7684\u7db2\u683c\u641c\u7d22\uff0c\u5982\u679c\u6709\u4f7f\u7528\u904e sklearn \u7684 Grid Search \u5176\u5be6\u4ed6\u5c31\u662f\u4e00\u6a23\u7684\u4f7f\u7528\u65b9\u5f0f\u3002 \u53c3\u8003 from catboost import CatBoostRegressor grid = { 'iterations' : [ 100 , 150 , 200 ], 'learning_rate' : [ 0.03 , 0.1 ], 'depth' : [ 2 , 4 , 6 , 8 ], 'l2_leaf_reg' : [ 0.2 , 0.5 , 1 , 3 ]} model = CatBoostRegressor ( random_state = 42 , loss_function = 'RMSE' , eval_metric = 'RMSE' ) model . grid_search ( grid , X_train , y_train )","title":"Grid search"},{"location":"18.CatBoost/#_5","text":"CatBoost \u7121\u9700\u5c0d\u6578\u64da\u7279\u5fb5\u9032\u884c\u4efb\u4f55\u7684\u9810\u8655\u7406\u5c31\u53ef\u4ee5\u5c07\u985e\u5225\u8f49\u63db\u7232\u6578\u5b57\u3002\u4e0b\u9762\u7a0b\u5f0f\u70ba\u4e00\u500b\u5206\u985e\u554f\u984c\u7684\u7bc4\u4f8b\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u7684\u7b2c\u4e00\u500b\u70ba\u5b63\u7bc0\u3002\u5728\u6a5f\u5668\u5b78\u7fd2\u4e0a\u7684\u8a8d\u77e5\u6211\u5011\u5fc5\u9808\u5c07\u6240\u4ee5\u5b57\u4e32\u578b\u8cc7\u6599\u5fc5\u9808\u900f\u904e\u6a19\u7c64\u7de8\u78bc\u65b9\u5f0f\u8f49\u63db\u6210\u6578\u503c\uff0c\u7136\u800c\u5728 CatBoost \u5b8c\u5168\u4e0d\u9700\u8981\u3002\u50c5\u9700\u5728\u8a13\u7df4\u6a21\u578b\u6642\u7d66\u4e88\u53c3\u6578 cat_features = [0] \u5373\u4ee3\u8868\u8cc7\u6599\u7684\u7b2c\u4e00\u500b\u7279\u5fb5\u9700\u8981\u9032\u884c\u985e\u5225\u8f49\u63db\u3002\u53e6\u5916\u8f38\u51fa\u8449\u4e0d\u4e00\u5b9a\u8981\u7de8\u78bc\u5f8c\u7684\u7d50\u679c\uff0c\u4f60\u4e5f\u53ef\u4ee5\u4e1f\u5165\u6587\u5b57\u9032\u884c\u8a13\u7df4\u53ea\u8981\u52a0\u4e0a loss_function='MultiClass' \u5373\u53ef\u3002 from catboost import Pool , CatBoostClassifier train_data = [[ \"summer\" , 1924 , 44 ], [ \"summer\" , 1932 , 37 ], [ \"winter\" , 1980 , 37 ], [ \"summer\" , 2012 , 204 ]] eval_data = [[ \"winter\" , 1996 , 197 ], [ \"winter\" , 1968 , 37 ], [ \"summer\" , 2002 , 77 ], [ \"summer\" , 1948 , 59 ]] train_label = [ \"France\" , \"USA\" , \"USA\" , \"UK\" ] eval_label = [ \"USA\" , \"France\" , \"USA\" , \"UK\" ] # Initialize CatBoostClassifier model = CatBoostClassifier ( iterations = 10 , learning_rate = 1 , depth = 2 , cat_features = [ 0 ], loss_function = 'MultiClass' ) # Fit model model . fit ( train_data , train_label ) # Get predicted classes preds_class = model . predict ( eval_data ) # Get predicted probabilities for each class preds_proba = model . predict_proba ( eval_data ) # Get predicted RawFormulaVal preds_raw = model . predict ( eval_data , prediction_type = 'RawFormulaVal' ) \u53c3\u8003","title":"\u81ea\u52d5\u8655\u7406\u985e\u5225\u578b\u7684\u7279\u5fb5"},{"location":"18.CatBoost/#verbose","text":"\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u96a8\u6642\u89c0\u5bdf\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684loss\uff0c\u4f7f\u7528verbose=10\u5373\u4ee3\u8868\u6bcf10\u6b21\u8fed\u4ee3\u6703\u986f\u793a\u4e00\u6b21\u8cc7\u8a0a\uff0c\u9019\u7a2e\u65b9\u5f0f\u4e5f\u89e3\u6c7a\u6bcf\u6b21\u758a\u4ee3\u986f\u793a\u4e00\u6b21\u7684\u56f0\u64fe\u3002\u8a13\u7df4\u904e\u7a0b\u4e2d\u5269\u9918\u6642\u9593\u4e5f\u6703\u986f\u793a\u51fa\u4f86\u3002","title":"\u5584\u7528 Verbose"},{"location":"18.CatBoost/#_6","text":"CatBoost \u63d0\u4f9b\u4e86 plot \u53ef\u4ee5\u65b9\u4fbf\u5728\u8a13\u7df4\u6642\u67e5\u770b\u4e26\u4f5c\u5373\u6642\u5206\u6790\u8a13\u7df4\u8da8\u52e2\u3002\u9664\u6b64\u4e4b\u5916 CatBoost \u4e5f\u652f\u63f4 SHAP \u589e\u52a0\u4e86\u6a21\u578b\u53ef\u89e3\u91cb\u3002\u8a73\u7d30\u7684\u4f7f\u7528\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 \u6559\u5b78 \u3002","title":"\u6a21\u578b\u7684\u89e3\u91cb"},{"location":"18.CatBoost/#_7","text":"CatBoost \u7684\u512a\u9ede\u548c\u4f7f\u7528\u65b9\u6cd5\u90fd\u4ecb\u7d39\u5b8c\u4e86\uff0c\u662f\u4e0d\u662f\u89ba\u5f97\u5341\u5206\u7c21\u55ae\u6613\u7528\u4e14\u529f\u80fd\u5f37\u5927\u3002\u5c24\u5176\u662f\u9047\u5230\u8cc7\u6599\u9700\u8981\u5927\u91cf\u8655\u7406\u548c\u7279\u5fb5\u6578\u503c\u5316\u7684\u4efb\u52d9\u6642\u6700\u9069\u5408\u4f7f\u7528 CatBoost \u4e86\u3002\u771f\u7684\u662f\u6240\u8b02\u7684\u61f6\u4eba\u5957\u4ef6\uff0c\u540d\u7b26\u5176\u5be6\u7684 Ying Train Yi Fa(\u786cTrain\u4e00\u767c)!","title":"\u5c0f\u7d50"},{"location":"18.CatBoost/#reference","text":"Tutorial: CatBoost Overview SHAP Catboost tutorial CatBoost regression in 6 minutes Catboost\uff1a\u8d85\u8d8aLightgbm\u548cXGBoost\u7684\u53c8\u4e00\u500bboost\u7b97\u6cd5\u795e\u5668 CatBoost\u3001LightGBM\u3001XGBoost\uff0c\u9019\u4e9b\u7b97\u6cd5\u4f60\u90fd\u77ad\u89e3\u55ce\uff1f \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"19.AutoML/","text":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u4f55\u8b02 AutoML \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 Grid Search Random Search Bayesian Optimization AutoML \u7684\u52d5\u6a5f \u5927\u5bb6\u9084\u8a18\u5f97\u5728 [Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4e2d\u6709\u63d0\u5230\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u7136\u800c\u6a21\u578b\u7684\u8a13\u7df4\u8207\u8d85\u53c3\u6578\u8abf\u6574\u50c5\u626e\u6f14\u5176\u4e2d\u7684\u4e00\u74b0\uff0c\u9078\u64c7\u4e00\u500b\u597d\u7684\u6a21\u578b\u662f\u4ef6\u91cd\u8981\u7684\u4e8b\u60c5\u3002\u60f3\u5fc5\u5927\u5bb6\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e00\u5b9a\u6703\u9047\u5230\u4e00\u500b\u68d8\u624b\u7684\u554f\u984c\uff0c\u5c31\u662f\u8a72\u5982\u4f55\u6b63\u78ba\u9078\u64c7\u6a21\u578b\u4ee5\u53ca\u8abf\u6574\u8d85\u53c3\u6578\uff1f\u96a8\u8457\u8d8a\u4f86\u8d8a\u591a\u7684\u6f14\u7b97\u6cd5\u4e0d\u65b7\u5730\u88ab\u958b\u767c\u51fa\u4f86\uff0c\u8981\u5f9e\u832b\u832b\u5927\u6d77\u4e2d\u6311\u9078\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u662f\u4ef6\u8017\u6642\u7684\u4e8b\u3002\u56e0\u6b64\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 (Automated Machine Learning ,AutoML) \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5728\u6709\u9650\u7684\u6642\u9593\u5167\u627e\u51fa\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002\u5728\u8fd1\u5e74\u4f86\u6709\u8a31\u591a\u4eba\u958b\u59cb\u7814\u7a76\u9019\u985e\u7684\u554f\u984c\uff0c\u7b46\u8005\u5f59\u6574\u4e86\u5e7e\u500b Python \u71b1\u9580\u7684 AutoML \u958b\u6e90\u5957\u4ef6\uff1a AutoGluon Auto-sklearn FLAML H2O AutoML LightAutoML Pycaret MLJAR TPOT MLBox Auto-PyTorch AutoKeras talos AutoML \u626e\u6f14\u7684\u89d2\u8272 \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u63d0\u4f9b\u4e86\u4e00\u7cfb\u5217\u7684\u65b9\u6cd5\u548c\u81ea\u52d5\u5316\u7684\u5b78\u7fd2\u6d41\u7a0b\uff0c\u4ee5\u63d0\u9ad8\u6a5f\u5668\u5b78\u7fd2\u7684\u6548\u7387\u4e26\u52a0\u901f\u6a5f\u5668\u5b78\u7fd2\u7684\u7814\u7a76\u3002\u900f\u904e AutoML \u96c6\u7d50\u5c08\u5bb6\u7684\u5148\u9a57\u77e5\u8b58\uff0c\u5927\u5e45\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u56f0\u96e3\u5ea6\u3002\u96d6\u7136\u9818\u57df\u5c08\u5bb6\u8207 AI \u5de5\u7a0b\u5e2b\u5fc5\u7136\u626e\u6f14\u91cd\u8981\u7684\u89d2\u8272\uff0c\u4f46\u662f\u8fd1\u5e74\u4f86 No Code \u7121\u7a0b\u5f0f\u78bc\u958b\u767c\u5e73\u53f0\u5f62\u6210\u4e00\u80a1\u6f6e\u6d41\u3002AI \u518d\u4e5f\u4e0d\u662f\u9700\u8981\u8cc7\u8a0a\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\uff0c\u76ee\u7684\u662f\u8b93\u5927\u5bb6\u4e0d\u7528\u900f\u904e\u5beb\u7a0b\u5f0f\u4e5f\u80fd\u5feb\u901f\u5730\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u7acb\u9810\u6e2c\u6a21\u578b\u3002\u7136\u800c\u8fd1\u5e74\u4f86\u8a31\u591a\u4f01\u696d\u958b\u767c\u4e86\u5404\u7a2e\u9700\u6c42\u7684 AutoML \u5e73\u53f0\uff0c\u5982\u96e8\u5f8c\u6625\u96e8\u822c\u7684\u51fa\u73fe\uff1a Google: Cloud AutoML Microsoft: Azure Machine Learning Amazon: SageMaker Autopilot Landing AI: LandingLens Chimes AI: tukey AutoML \u80fd\u5e6b\u52a9\u591a\u5c11\u4e8b\u60c5 \u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u662f\u4e00\u500b\u8fed\u4ee3\u7684\u5faa\u74b0\u9031\u671f\uff0c\u5f9e\u5b9a\u7fa9\u554f\u984c\u3001\u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u3001\u6a21\u578b\u8a2d\u8a08\u5230\u6700\u7d42\u6a21\u578b\u90e8\u7f72\uff0c\u6bcf\u500b\u6b65\u9a5f\u6975\u70ba\u91cd\u8981\u4e14\u7f3a\u4e00\u4e0d\u53ef\u3002\u6b64\u5916\u4e00\u500b\u597d\u7684\u6a5f\u5668\u5b78\u7fd2\u7684\u5c08\u6848\u9700\u8981\u57f7\u884c MLOps \u7684\u6d41\u7a0b\uff0c\u624d\u80fd\u5920\u8b93\u6a21\u578b\u5728\u5be6\u969b\u61c9\u7528\u5834\u666f\u8d8a\u4f86\u8d8a\u597d\u771f\u5be6\u5730\u89e3\u6c7a\u554f\u984c\u3002MLOps \u6307\u7684\u662f\u5f9e AI \u6a21\u578b\u8a13\u7df4\u5230\u90e8\u7f72\u4e0a\u7dda\u7684\u4e00\u5957\u5b8c\u6574\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\uff0c\u8fd1\u5e74\u4f86\u9019\u4e00\u540d\u8a5e\u975e\u5e38\u71b1\u9580\uff0c\u5b83\u5176\u5be6\u5c31\u662f ML (\u6a5f\u5668\u5b78\u7fd2) \u8207 DevOps (\u958b\u767c\u8207\u7dad\u904b) \u7684\u7d50\u5408\u3002\u5982\u4e0b\u5716\u6240\u793a\u5f9e\u8a13\u7df4\u6a21\u578b\u5230\u6b63\u5f0f\u90e8\u7f72\u4e2d\u9593\u9084\u6709\u8a31\u591a\u4e8b\u60c5\u8981\u8655\u7406\uff0c\u800c\u6a21\u578b\u4e0a\u7dda\u5f8c\u9084\u662f\u8981\u6301\u7e8c\u76e3\u63a7\u4e26\u6536\u96c6\u65b0\u7684\u5834\u57df\u8cc7\u6599\u3002\u6700\u5f8c\u5c07\u8cc7\u6599\u6536\u96c6\u5230\u4e00\u5b9a\u7a0b\u5ea6\uff0c\u53c8\u56de\u5230\u9031\u671f\u7684\u7b2c\u4e00\u6b65\u91cd\u65b0\u8a13\u7df4\u65b0\u6a21\u578b\u3002\u81f3\u65bc\u6a21\u578b\u8a72\u5982\u4f55\u91cd\u65b0\u8a13\u7df4\u4e26\u4fdd\u6301\u8cc7\u6599\u7684\u96b1\u79c1\u6027\u5c31\u662f\u53e6\u4e00\u9580\u8b70\u984c\u3002\u9019\u6642\u5019\u6211\u5011\u5c31\u80fd\u63a1\u7528\u4e00\u500b\u6280\u8853\u53eb\u505a Federated Learning (\u806f\u5408\u5b78\u7fd2) \u60f3\u8fa6\u6cd5\u8655\u7406\u9019\u985e\u7684\u4e8b\u60c5\u3002 \u4f46\u662f\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u8a13\u7df4\u4e00\u500b\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\uff0c\u5728 MLOps \u7684\u9031\u671f\u4e2d\u50c5\u626e\u6f14\u5c0f\u5c0f\u7684\u4e00\u584a\u89d2\u8272\u3002\u4e0b\u5716\u662f\u4e00\u500b\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u7684\u57fa\u672c\u6d41\u7a0b\uff0c\u4e2d\u9593\u6a58\u8272\u7684\u90e8\u5206\u5c31\u662f AutoML \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u7684\u4e8b\u3002\u5f9e\u8cc7\u6599\u524d\u8655\u7406\u3001\u8a13\u7df4\u6a21\u578b\u5230\u8a55\u4f30\u6a21\u578b\u9700\u8981\u4e0d\u65b7\u5730\u7684\u9032\u884c\u8a66\u9a57\uff0c\u4e26\u4e14\u5617\u8a66\u5404\u7a2e\u4e0d\u540c\u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u6a21\u578b\u8d85\u53c3\u6578\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\uff0c\u90fd\u53ef\u4ee5\u900f\u904e AutoML \u81ea\u52d5\u5316\u7684\u8a13\u7df4\u627e\u5230\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002 \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 \u6a5f\u5668\u5b78\u7fd2\u81ea\u52d5\u5316\u7684\u56f0\u96e3\u9ede\u5728\u65bc\u8cc7\u6599\u6e05\u6d17\u8207\u7279\u5fb5\u5de5\u7a0b\u6280\u5de7\u3002\u4e00\u500b\u597d\u7684\u7279\u5fb5\u8868\u9054\u53ef\u4ee5\u8b93\u6a21\u578b\u5feb\u901f\u5730\u6293\u5230\u95dc\u9375\u56e0\u5b50\uff0c\u4e26\u8b93\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u63d0\u5347\u3002\u6176\u5e78\u7684\u662f\u6a21\u578b\u6311\u9078\u548c\u8d85\u53c3\u6578\u8abf\u6574\u5df2\u7d93\u6709\u6bd4\u8f03\u6210\u719f\u7684\u65b9\u6cd5\u53ef\u4ee5\u5354\u52a9\u6211\u5011\u6709\u6548\u7684\u641c\u5c0b\u3002 Grid Search \u7db2\u683c\u641c\u7d22/\u7aae\u8209\u641c\u7d22 Random Search \u96a8\u6a5f\u641c\u7d22 Bayesian Optimization \u8c9d\u8449\u65af\u512a\u5316 Grid Search Grid Search (\u7db2\u683c\u641c\u7d22) \u53c8\u7a31\u7aae\u8209\u641c\u7d22\u3002\u5b83\u7684\u641c\u7d22\u65b9\u5f0f\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u4e2d\uff0c\u900f\u904e\u6392\u5217\u7d44\u5408\u5617\u8a66\u6bcf\u4e00\u7a2e\u53ef\u80fd\u6027\u3002\u4e26\u5c07\u8868\u73fe\u6700\u597d\u7684\u53c3\u6578\u6700\u70ba\u6700\u7d42\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u7d50\u679c\u3002\u4ed6\u7684\u7f3a\u9ede\u5c31\u662f\u7576\u6709\u8a31\u591a\u8d85\u53c3\u6578\u8981\u5c0b\u627e\u6642\uff0c\u4ed6\u7684\u6392\u5217\u7d44\u5408\u5c31\u6703\u8b8a\u5f97\u975e\u5e38\u591a\uff0c\u5c0e\u81f4\u641c\u7d22\u7684\u6642\u9593\u8b8a\u9577\u82b1\u8cbb\u7684\u8cc7\u6e90\u4e5f\u8b8a\u5927\u3002\u56e0\u6b64\u9019\u7a2e\u66b4\u529b\u5f0f\u7684\u641c\u7d22\u65b9\u6cd5\u9069\u5408\u5728\u5c0f\u7684\u8cc7\u6599\u96c6\u4e0a\u88ab\u63a1\u7528\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u6709\u63d0\u4f9b GridSearchCV \u65b9\u6cd5\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u53c3\u6578\u5217\u8868\uff0c\u4e26\u900f\u904e\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u7d44\u5408\u4e00\u500b\u4e00\u500b\u5617\u8a66\u627e\u5230\u6700\u5408\u9069\u7684\u53c3\u6578\u3002 from sklearn import svm , datasets from sklearn.model_selection import GridSearchCV # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = datasets . load_iris () # \u8a2d\u5b9a\u60f3\u8981\u7684\u641c\u7d22\u53c3\u6578\u4e26\u7d66\u4e88\u5019\u9078\u503c parameters = { 'kernel' :( 'linear' , 'rbf' ), 'C' :[ 1 , 10 ]} # \u5efa\u7acb SVC \u5206\u985e\u5668 svc = svm . SVC () # \u7db2\u683c\u641c\u7d22\u6240\u6709\u53ef\u80fd\u7684\u7d44\u5408(2*2)\u5171\u56db\u7a2e clf = GridSearchCV ( svc , parameters ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b clf . fit ( iris . data , iris . target ) \u641c\u7d22\u7d50\u675f\u5f8c\u4e5f\u80fd\u5920\u904e cv_results_ \u67e5\u770b\u6240\u6709\u7d44\u5408\u7684\u8d85\u53c3\u6578\u6240\u5c0d\u61c9\u7684\u8a13\u7df4\u7d50\u679c\u3002 clf . cv_results_ Random Search Random Search (\u96a8\u6a5f\u641c\u7d22) \u6309\u7167\u5b57\u9762\u4e0a\u7684\u610f\u601d\u5c31\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u5019\u9078\u53c3\u6578\u4e2d\u96a8\u6a5f\u6311\u9078\u4e00\u500b\u6578\u503c\u4e26\u5617\u8a66\u3002\u5982\u679c\u9700\u8981\u8abf\u7684\u53c3\u6578\u8f03\u591a\u7684\u6642\u5019\uff0c\u4f7f\u7528\u96a8\u6a5f\u641c\u7d22\u53ef\u4ee5\u964d\u4f4e\u641c\u7d22\u6642\u9593\uff0c\u540c\u6642\u53c8\u80fd\u78ba\u4fdd\u4e00\u5b9a\u7684\u6a21\u578b\u6e96\u78ba\u6027\u3002\u5728 Sklearn \u5957\u4ef6\u4e2d\u4e5f\u6709\u63d0\u4f9b RandomizedSearchCV \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff0c\u8207\u7db2\u683c\u641c\u7d22\u7684\u5dee\u5225\u5728\u65bc\u4f7f\u7528\u8005\u53ef\u4ee5\u5c07\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d\u3002\u8a72\u65b9\u6cd5\u6703\u5728\u6b64\u7bc4\u570d\u4e2d\u96a8\u6a5f\u62bd\u4e00\u500b\u6578\u503c\u4e26\u9032\u884c\u6a21\u578b\u8a13\u7df4\u4e26\u9a57\u8b49\u6a21\u578b\u3002\u4e26\u627e\u51fa\u6240\u6709\u96a8\u6a5f\u7d44\u5408\u4e2d\u8868\u73fe\u6700\u597d\u7684\u4e00\u7d44\u8d85\u53c3\u6578\u3002 from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RandomizedSearchCV from scipy.stats import uniform # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = load_iris () # \u5efa\u7acb\u908f\u8f2f\u8ff4\u6b78\u6a21\u578b logistic = LogisticRegression ( solver = 'saga' , tol = 1e-2 , max_iter = 200 , random_state = 0 ) # \u8a2d\u5b9a\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u4e26\u7d66\u4e88\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d distributions = dict ( C = uniform ( loc = 0 , scale = 4 ), penalty = [ 'l2' , 'l1' ]) # \u96a8\u6a5f\u641c\u7d22\u9810\u8a2d n_iter=10 clf = RandomizedSearchCV ( logistic , distributions , random_state = 0 , n_iter = 10 ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b search = clf . fit ( iris . data , iris . target ) search . best_params_ Bayesian Optimization Bayesian Optimization (\u8c9d\u8449\u65af\u512a\u5316) \u76ee\u6a19\u662f\u8981\u5728\u6700\u5c11\u7684\u8a66\u9a57\u4e0b\u5c0b\u627e\u4e00\u7d44\u6700\u4f73\u7684\u8d85\u53c3\u6578\u4f7f\u5f97\u932f\u8aa4\u7387\u80fd\u5920\u8d8a\u4f4e\u8d8a\u597d\u3002\u7531\u65bc\u6211\u5011\u6240\u6536\u96c6\u5230\u7684\u8cc7\u6599\u7121\u5f9e\u5f97\u77e5\u8a72\u6a21\u578b\u7684\u76ee\u6a19\u51fd\u6578\u662f\u9577\u600e\u6a23\uff0c\u56e0\u6b64\u6a5f\u5668\u5b78\u7fd2\u7684\u76ee\u7684\u5c31\u662f\u8981\u5f9e\u9019\u4e9b\u8cc7\u6599\u4e2d\u53bb\u64ec\u5408\u4e00\u500b\u51fd\u6578\uff0c\u76ee\u6a19\u662f\u7d66\u4e88\u4e00\u7b46\u8f38\u5165 X \u8a72\u51fd\u6578\u7684\u8f38\u51fa\u8981\u8207\u771f\u5be6\u7684\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u900f\u904e\u4ee3\u7406\u512a\u5316 (surrogate optimization) \u4f7f\u7528\u4e00\u500b\u4ee3\u7406\u51fd\u6578\u4f86\u4f30\u8a08\u76ee\u6a19\u51fd\u6578\u3002\u7c21\u55ae\u4f86\u8aaa\u4ee3\u7406\u51fd\u6578\u662f\u6307\u76ee\u6a19\u51fd\u6578\u7684\u4e00\u7a2e\u8fd1\u4f3c\uff0c\u6b64\u5916\u4ee3\u7406\u51fd\u6578\u53ef\u57fa\u65bc\u53d6\u6a23\u5f97\u5230\u7684\u8cc7\u6599\u9ede\u88ab\u69cb\u5efa\u51fa\u4f86\u3002 \u4ee3\u7406\u51fd\u6578\u7684\u76ee\u7684\u662f\u5728\u7d66\u5b9a\u4e00\u7d44\u7279\u5b9a\u7684\u5019\u9078\u8d85\u53c3\u6578\u7684\u60c5\u6cc1\u4e0b\u5feb\u901f\u4f30\u8a08\u5be6\u969b\u6a21\u578b\u7684\u932f\u8aa4\u7387\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u53ef\u4ee5\u5feb\u901f\u6c7a\u5b9a\u8a72\u7d44\u8d85\u53c3\u6578\u662f\u5426\u53ef\u4ee5\u88ab\u62ff\u4f86\u5be6\u969b\u8a13\u7df4\u6a21\u578b\u3002\u96a8\u8457\u8a66\u9a57\u6b21\u6578\u7684\u589e\u52a0\uff0c\u4ee3\u7406\u51fd\u6578\u96a8\u8457\u5148\u524d\u7684\u8a66\u9a57\u7d50\u679c\u800c\u66f4\u65b0\u6539\u9032\uff0c\u4e26\u958b\u59cb\u63a8\u85a6\u66f4\u597d\u7684\u5019\u9078\u8d85\u53c3\u6578\u3002 Auto-sklearn \u5c31\u662f\u4e00\u500b\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u7684\u4e00\u500b\u5de5\u5177\u3002\u540c\u6642\u5b83\u4e5f\u80fd\u641c\u7d22\u5728 Sklearn \u4e2d\u6240\u6709\u53ef\u80fd\u7684\u7b97\u6cd5\uff0c\u4e26\u70ba\u4f60\u63a8\u85a6\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u8207\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u3002\u660e\u5929\u6211\u5011\u5c31\u4f86\u4e00\u63a2\u7a76\u7adf\u8a72\u5957\u4ef6\u80cc\u5f8c\u7684\u795e\u79d8\u539f\u7406\u4ee5\u53ca\u7a0b\u5f0f\u5be6\u4f5c\u5427\uff01 Reference automl.org Sklearn \u5b98\u65b9\u6587\u4ef6 GridSearchCV Sklearn \u5b98\u65b9\u6587\u4ef6 RandomizedSearchCV \u7528\u7c21\u55ae\u8853\u8a9e\u8b93\u4f60\u770b\u5230\u8c9d\u8449\u65af\u512a\u5316\u4e4b\u7f8e \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML"},{"location":"19.AutoML/#day-19-automl","text":"","title":"[Day 19] \u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 - AutoML"},{"location":"19.AutoML/#_1","text":"\u4e86\u89e3\u4f55\u8b02 AutoML \u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5 Grid Search Random Search Bayesian Optimization","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"19.AutoML/#automl","text":"\u5927\u5bb6\u9084\u8a18\u5f97\u5728 [Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4e2d\u6709\u63d0\u5230\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u7136\u800c\u6a21\u578b\u7684\u8a13\u7df4\u8207\u8d85\u53c3\u6578\u8abf\u6574\u50c5\u626e\u6f14\u5176\u4e2d\u7684\u4e00\u74b0\uff0c\u9078\u64c7\u4e00\u500b\u597d\u7684\u6a21\u578b\u662f\u4ef6\u91cd\u8981\u7684\u4e8b\u60c5\u3002\u60f3\u5fc5\u5927\u5bb6\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e00\u5b9a\u6703\u9047\u5230\u4e00\u500b\u68d8\u624b\u7684\u554f\u984c\uff0c\u5c31\u662f\u8a72\u5982\u4f55\u6b63\u78ba\u9078\u64c7\u6a21\u578b\u4ee5\u53ca\u8abf\u6574\u8d85\u53c3\u6578\uff1f\u96a8\u8457\u8d8a\u4f86\u8d8a\u591a\u7684\u6f14\u7b97\u6cd5\u4e0d\u65b7\u5730\u88ab\u958b\u767c\u51fa\u4f86\uff0c\u8981\u5f9e\u832b\u832b\u5927\u6d77\u4e2d\u6311\u9078\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u662f\u4ef6\u8017\u6642\u7684\u4e8b\u3002\u56e0\u6b64\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2 (Automated Machine Learning ,AutoML) \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5728\u6709\u9650\u7684\u6642\u9593\u5167\u627e\u51fa\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002\u5728\u8fd1\u5e74\u4f86\u6709\u8a31\u591a\u4eba\u958b\u59cb\u7814\u7a76\u9019\u985e\u7684\u554f\u984c\uff0c\u7b46\u8005\u5f59\u6574\u4e86\u5e7e\u500b Python \u71b1\u9580\u7684 AutoML \u958b\u6e90\u5957\u4ef6\uff1a AutoGluon Auto-sklearn FLAML H2O AutoML LightAutoML Pycaret MLJAR TPOT MLBox Auto-PyTorch AutoKeras talos","title":"AutoML \u7684\u52d5\u6a5f"},{"location":"19.AutoML/#automl_1","text":"\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u63d0\u4f9b\u4e86\u4e00\u7cfb\u5217\u7684\u65b9\u6cd5\u548c\u81ea\u52d5\u5316\u7684\u5b78\u7fd2\u6d41\u7a0b\uff0c\u4ee5\u63d0\u9ad8\u6a5f\u5668\u5b78\u7fd2\u7684\u6548\u7387\u4e26\u52a0\u901f\u6a5f\u5668\u5b78\u7fd2\u7684\u7814\u7a76\u3002\u900f\u904e AutoML \u96c6\u7d50\u5c08\u5bb6\u7684\u5148\u9a57\u77e5\u8b58\uff0c\u5927\u5e45\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u56f0\u96e3\u5ea6\u3002\u96d6\u7136\u9818\u57df\u5c08\u5bb6\u8207 AI \u5de5\u7a0b\u5e2b\u5fc5\u7136\u626e\u6f14\u91cd\u8981\u7684\u89d2\u8272\uff0c\u4f46\u662f\u8fd1\u5e74\u4f86 No Code \u7121\u7a0b\u5f0f\u78bc\u958b\u767c\u5e73\u53f0\u5f62\u6210\u4e00\u80a1\u6f6e\u6d41\u3002AI \u518d\u4e5f\u4e0d\u662f\u9700\u8981\u8cc7\u8a0a\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\uff0c\u76ee\u7684\u662f\u8b93\u5927\u5bb6\u4e0d\u7528\u900f\u904e\u5beb\u7a0b\u5f0f\u4e5f\u80fd\u5feb\u901f\u5730\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u7acb\u9810\u6e2c\u6a21\u578b\u3002\u7136\u800c\u8fd1\u5e74\u4f86\u8a31\u591a\u4f01\u696d\u958b\u767c\u4e86\u5404\u7a2e\u9700\u6c42\u7684 AutoML \u5e73\u53f0\uff0c\u5982\u96e8\u5f8c\u6625\u96e8\u822c\u7684\u51fa\u73fe\uff1a Google: Cloud AutoML Microsoft: Azure Machine Learning Amazon: SageMaker Autopilot Landing AI: LandingLens Chimes AI: tukey","title":"AutoML \u626e\u6f14\u7684\u89d2\u8272"},{"location":"19.AutoML/#automl_2","text":"\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u662f\u4e00\u500b\u8fed\u4ee3\u7684\u5faa\u74b0\u9031\u671f\uff0c\u5f9e\u5b9a\u7fa9\u554f\u984c\u3001\u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u3001\u6a21\u578b\u8a2d\u8a08\u5230\u6700\u7d42\u6a21\u578b\u90e8\u7f72\uff0c\u6bcf\u500b\u6b65\u9a5f\u6975\u70ba\u91cd\u8981\u4e14\u7f3a\u4e00\u4e0d\u53ef\u3002\u6b64\u5916\u4e00\u500b\u597d\u7684\u6a5f\u5668\u5b78\u7fd2\u7684\u5c08\u6848\u9700\u8981\u57f7\u884c MLOps \u7684\u6d41\u7a0b\uff0c\u624d\u80fd\u5920\u8b93\u6a21\u578b\u5728\u5be6\u969b\u61c9\u7528\u5834\u666f\u8d8a\u4f86\u8d8a\u597d\u771f\u5be6\u5730\u89e3\u6c7a\u554f\u984c\u3002MLOps \u6307\u7684\u662f\u5f9e AI \u6a21\u578b\u8a13\u7df4\u5230\u90e8\u7f72\u4e0a\u7dda\u7684\u4e00\u5957\u5b8c\u6574\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\uff0c\u8fd1\u5e74\u4f86\u9019\u4e00\u540d\u8a5e\u975e\u5e38\u71b1\u9580\uff0c\u5b83\u5176\u5be6\u5c31\u662f ML (\u6a5f\u5668\u5b78\u7fd2) \u8207 DevOps (\u958b\u767c\u8207\u7dad\u904b) \u7684\u7d50\u5408\u3002\u5982\u4e0b\u5716\u6240\u793a\u5f9e\u8a13\u7df4\u6a21\u578b\u5230\u6b63\u5f0f\u90e8\u7f72\u4e2d\u9593\u9084\u6709\u8a31\u591a\u4e8b\u60c5\u8981\u8655\u7406\uff0c\u800c\u6a21\u578b\u4e0a\u7dda\u5f8c\u9084\u662f\u8981\u6301\u7e8c\u76e3\u63a7\u4e26\u6536\u96c6\u65b0\u7684\u5834\u57df\u8cc7\u6599\u3002\u6700\u5f8c\u5c07\u8cc7\u6599\u6536\u96c6\u5230\u4e00\u5b9a\u7a0b\u5ea6\uff0c\u53c8\u56de\u5230\u9031\u671f\u7684\u7b2c\u4e00\u6b65\u91cd\u65b0\u8a13\u7df4\u65b0\u6a21\u578b\u3002\u81f3\u65bc\u6a21\u578b\u8a72\u5982\u4f55\u91cd\u65b0\u8a13\u7df4\u4e26\u4fdd\u6301\u8cc7\u6599\u7684\u96b1\u79c1\u6027\u5c31\u662f\u53e6\u4e00\u9580\u8b70\u984c\u3002\u9019\u6642\u5019\u6211\u5011\u5c31\u80fd\u63a1\u7528\u4e00\u500b\u6280\u8853\u53eb\u505a Federated Learning (\u806f\u5408\u5b78\u7fd2) \u60f3\u8fa6\u6cd5\u8655\u7406\u9019\u985e\u7684\u4e8b\u60c5\u3002 \u4f46\u662f\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u8a13\u7df4\u4e00\u500b\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\uff0c\u5728 MLOps \u7684\u9031\u671f\u4e2d\u50c5\u626e\u6f14\u5c0f\u5c0f\u7684\u4e00\u584a\u89d2\u8272\u3002\u4e0b\u5716\u662f\u4e00\u500b\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u7684\u57fa\u672c\u6d41\u7a0b\uff0c\u4e2d\u9593\u6a58\u8272\u7684\u90e8\u5206\u5c31\u662f AutoML \u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u7684\u4e8b\u3002\u5f9e\u8cc7\u6599\u524d\u8655\u7406\u3001\u8a13\u7df4\u6a21\u578b\u5230\u8a55\u4f30\u6a21\u578b\u9700\u8981\u4e0d\u65b7\u5730\u7684\u9032\u884c\u8a66\u9a57\uff0c\u4e26\u4e14\u5617\u8a66\u5404\u7a2e\u4e0d\u540c\u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u6a21\u578b\u8d85\u53c3\u6578\u3002\u9664\u6b64\u4e4b\u5916\u9084\u6709\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\uff0c\u90fd\u53ef\u4ee5\u900f\u904e AutoML \u81ea\u52d5\u5316\u7684\u8a13\u7df4\u627e\u5230\u4e00\u500b\u6eff\u610f\u7684\u6a21\u578b\u3002","title":"AutoML \u80fd\u5e6b\u52a9\u591a\u5c11\u4e8b\u60c5"},{"location":"19.AutoML/#_2","text":"\u6a5f\u5668\u5b78\u7fd2\u81ea\u52d5\u5316\u7684\u56f0\u96e3\u9ede\u5728\u65bc\u8cc7\u6599\u6e05\u6d17\u8207\u7279\u5fb5\u5de5\u7a0b\u6280\u5de7\u3002\u4e00\u500b\u597d\u7684\u7279\u5fb5\u8868\u9054\u53ef\u4ee5\u8b93\u6a21\u578b\u5feb\u901f\u5730\u6293\u5230\u95dc\u9375\u56e0\u5b50\uff0c\u4e26\u8b93\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u63d0\u5347\u3002\u6176\u5e78\u7684\u662f\u6a21\u578b\u6311\u9078\u548c\u8d85\u53c3\u6578\u8abf\u6574\u5df2\u7d93\u6709\u6bd4\u8f03\u6210\u719f\u7684\u65b9\u6cd5\u53ef\u4ee5\u5354\u52a9\u6211\u5011\u6709\u6548\u7684\u641c\u5c0b\u3002 Grid Search \u7db2\u683c\u641c\u7d22/\u7aae\u8209\u641c\u7d22 Random Search \u96a8\u6a5f\u641c\u7d22 Bayesian Optimization \u8c9d\u8449\u65af\u512a\u5316","title":"\u8d85\u53c3\u6578\u8abf\u53c3\u65b9\u6cd5"},{"location":"19.AutoML/#grid-search","text":"Grid Search (\u7db2\u683c\u641c\u7d22) \u53c8\u7a31\u7aae\u8209\u641c\u7d22\u3002\u5b83\u7684\u641c\u7d22\u65b9\u5f0f\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u4e2d\uff0c\u900f\u904e\u6392\u5217\u7d44\u5408\u5617\u8a66\u6bcf\u4e00\u7a2e\u53ef\u80fd\u6027\u3002\u4e26\u5c07\u8868\u73fe\u6700\u597d\u7684\u53c3\u6578\u6700\u70ba\u6700\u7d42\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u7d50\u679c\u3002\u4ed6\u7684\u7f3a\u9ede\u5c31\u662f\u7576\u6709\u8a31\u591a\u8d85\u53c3\u6578\u8981\u5c0b\u627e\u6642\uff0c\u4ed6\u7684\u6392\u5217\u7d44\u5408\u5c31\u6703\u8b8a\u5f97\u975e\u5e38\u591a\uff0c\u5c0e\u81f4\u641c\u7d22\u7684\u6642\u9593\u8b8a\u9577\u82b1\u8cbb\u7684\u8cc7\u6e90\u4e5f\u8b8a\u5927\u3002\u56e0\u6b64\u9019\u7a2e\u66b4\u529b\u5f0f\u7684\u641c\u7d22\u65b9\u6cd5\u9069\u5408\u5728\u5c0f\u7684\u8cc7\u6599\u96c6\u4e0a\u88ab\u63a1\u7528\u3002\u7136\u800c\u5728 Sklearn \u5957\u4ef6\u4e2d\u6709\u63d0\u4f9b GridSearchCV \u65b9\u6cd5\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u53c3\u6578\u5217\u8868\uff0c\u4e26\u900f\u904e\u6240\u6709\u53ef\u80fd\u7684\u53c3\u6578\u7d44\u5408\u4e00\u500b\u4e00\u500b\u5617\u8a66\u627e\u5230\u6700\u5408\u9069\u7684\u53c3\u6578\u3002 from sklearn import svm , datasets from sklearn.model_selection import GridSearchCV # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = datasets . load_iris () # \u8a2d\u5b9a\u60f3\u8981\u7684\u641c\u7d22\u53c3\u6578\u4e26\u7d66\u4e88\u5019\u9078\u503c parameters = { 'kernel' :( 'linear' , 'rbf' ), 'C' :[ 1 , 10 ]} # \u5efa\u7acb SVC \u5206\u985e\u5668 svc = svm . SVC () # \u7db2\u683c\u641c\u7d22\u6240\u6709\u53ef\u80fd\u7684\u7d44\u5408(2*2)\u5171\u56db\u7a2e clf = GridSearchCV ( svc , parameters ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b clf . fit ( iris . data , iris . target ) \u641c\u7d22\u7d50\u675f\u5f8c\u4e5f\u80fd\u5920\u904e cv_results_ \u67e5\u770b\u6240\u6709\u7d44\u5408\u7684\u8d85\u53c3\u6578\u6240\u5c0d\u61c9\u7684\u8a13\u7df4\u7d50\u679c\u3002 clf . cv_results_","title":"Grid Search"},{"location":"19.AutoML/#random-search","text":"Random Search (\u96a8\u6a5f\u641c\u7d22) \u6309\u7167\u5b57\u9762\u4e0a\u7684\u610f\u601d\u5c31\u662f\u5728\u6240\u6709\u53ef\u80fd\u7684\u5019\u9078\u53c3\u6578\u4e2d\u96a8\u6a5f\u6311\u9078\u4e00\u500b\u6578\u503c\u4e26\u5617\u8a66\u3002\u5982\u679c\u9700\u8981\u8abf\u7684\u53c3\u6578\u8f03\u591a\u7684\u6642\u5019\uff0c\u4f7f\u7528\u96a8\u6a5f\u641c\u7d22\u53ef\u4ee5\u964d\u4f4e\u641c\u7d22\u6642\u9593\uff0c\u540c\u6642\u53c8\u80fd\u78ba\u4fdd\u4e00\u5b9a\u7684\u6a21\u578b\u6e96\u78ba\u6027\u3002\u5728 Sklearn \u5957\u4ef6\u4e2d\u4e5f\u6709\u63d0\u4f9b RandomizedSearchCV \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff0c\u8207\u7db2\u683c\u641c\u7d22\u7684\u5dee\u5225\u5728\u65bc\u4f7f\u7528\u8005\u53ef\u4ee5\u5c07\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d\u3002\u8a72\u65b9\u6cd5\u6703\u5728\u6b64\u7bc4\u570d\u4e2d\u96a8\u6a5f\u62bd\u4e00\u500b\u6578\u503c\u4e26\u9032\u884c\u6a21\u578b\u8a13\u7df4\u4e26\u9a57\u8b49\u6a21\u578b\u3002\u4e26\u627e\u51fa\u6240\u6709\u96a8\u6a5f\u7d44\u5408\u4e2d\u8868\u73fe\u6700\u597d\u7684\u4e00\u7d44\u8d85\u53c3\u6578\u3002 from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RandomizedSearchCV from scipy.stats import uniform # \u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 iris = load_iris () # \u5efa\u7acb\u908f\u8f2f\u8ff4\u6b78\u6a21\u578b logistic = LogisticRegression ( solver = 'saga' , tol = 1e-2 , max_iter = 200 , random_state = 0 ) # \u8a2d\u5b9a\u6b32\u641c\u5c0b\u7684\u8d85\u53c3\u6578\u4e26\u7d66\u4e88\u4e00\u500b\u671f\u671b\u7684\u7bc4\u570d distributions = dict ( C = uniform ( loc = 0 , scale = 4 ), penalty = [ 'l2' , 'l1' ]) # \u96a8\u6a5f\u641c\u7d22\u9810\u8a2d n_iter=10 clf = RandomizedSearchCV ( logistic , distributions , random_state = 0 , n_iter = 10 ) # \u64ec\u5408\u6578\u64da\u4e26\u56de\u50b3\u6700\u4f73\u6a21\u578b search = clf . fit ( iris . data , iris . target ) search . best_params_","title":"Random Search"},{"location":"19.AutoML/#bayesian-optimization","text":"Bayesian Optimization (\u8c9d\u8449\u65af\u512a\u5316) \u76ee\u6a19\u662f\u8981\u5728\u6700\u5c11\u7684\u8a66\u9a57\u4e0b\u5c0b\u627e\u4e00\u7d44\u6700\u4f73\u7684\u8d85\u53c3\u6578\u4f7f\u5f97\u932f\u8aa4\u7387\u80fd\u5920\u8d8a\u4f4e\u8d8a\u597d\u3002\u7531\u65bc\u6211\u5011\u6240\u6536\u96c6\u5230\u7684\u8cc7\u6599\u7121\u5f9e\u5f97\u77e5\u8a72\u6a21\u578b\u7684\u76ee\u6a19\u51fd\u6578\u662f\u9577\u600e\u6a23\uff0c\u56e0\u6b64\u6a5f\u5668\u5b78\u7fd2\u7684\u76ee\u7684\u5c31\u662f\u8981\u5f9e\u9019\u4e9b\u8cc7\u6599\u4e2d\u53bb\u64ec\u5408\u4e00\u500b\u51fd\u6578\uff0c\u76ee\u6a19\u662f\u7d66\u4e88\u4e00\u7b46\u8f38\u5165 X \u8a72\u51fd\u6578\u7684\u8f38\u51fa\u8981\u8207\u771f\u5be6\u7684\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u900f\u904e\u4ee3\u7406\u512a\u5316 (surrogate optimization) \u4f7f\u7528\u4e00\u500b\u4ee3\u7406\u51fd\u6578\u4f86\u4f30\u8a08\u76ee\u6a19\u51fd\u6578\u3002\u7c21\u55ae\u4f86\u8aaa\u4ee3\u7406\u51fd\u6578\u662f\u6307\u76ee\u6a19\u51fd\u6578\u7684\u4e00\u7a2e\u8fd1\u4f3c\uff0c\u6b64\u5916\u4ee3\u7406\u51fd\u6578\u53ef\u57fa\u65bc\u53d6\u6a23\u5f97\u5230\u7684\u8cc7\u6599\u9ede\u88ab\u69cb\u5efa\u51fa\u4f86\u3002 \u4ee3\u7406\u51fd\u6578\u7684\u76ee\u7684\u662f\u5728\u7d66\u5b9a\u4e00\u7d44\u7279\u5b9a\u7684\u5019\u9078\u8d85\u53c3\u6578\u7684\u60c5\u6cc1\u4e0b\u5feb\u901f\u4f30\u8a08\u5be6\u969b\u6a21\u578b\u7684\u932f\u8aa4\u7387\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u53ef\u4ee5\u5feb\u901f\u6c7a\u5b9a\u8a72\u7d44\u8d85\u53c3\u6578\u662f\u5426\u53ef\u4ee5\u88ab\u62ff\u4f86\u5be6\u969b\u8a13\u7df4\u6a21\u578b\u3002\u96a8\u8457\u8a66\u9a57\u6b21\u6578\u7684\u589e\u52a0\uff0c\u4ee3\u7406\u51fd\u6578\u96a8\u8457\u5148\u524d\u7684\u8a66\u9a57\u7d50\u679c\u800c\u66f4\u65b0\u6539\u9032\uff0c\u4e26\u958b\u59cb\u63a8\u85a6\u66f4\u597d\u7684\u5019\u9078\u8d85\u53c3\u6578\u3002 Auto-sklearn \u5c31\u662f\u4e00\u500b\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u7684\u4e00\u500b\u5de5\u5177\u3002\u540c\u6642\u5b83\u4e5f\u80fd\u641c\u7d22\u5728 Sklearn \u4e2d\u6240\u6709\u53ef\u80fd\u7684\u7b97\u6cd5\uff0c\u4e26\u70ba\u4f60\u63a8\u85a6\u4e00\u500b\u5408\u9069\u7684\u6a21\u578b\u8207\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u3002\u660e\u5929\u6211\u5011\u5c31\u4f86\u4e00\u63a2\u7a76\u7adf\u8a72\u5957\u4ef6\u80cc\u5f8c\u7684\u795e\u79d8\u539f\u7406\u4ee5\u53ca\u7a0b\u5f0f\u5be6\u4f5c\u5427\uff01","title":"Bayesian Optimization"},{"location":"19.AutoML/#reference","text":"automl.org Sklearn \u5b98\u65b9\u6587\u4ef6 GridSearchCV Sklearn \u5b98\u65b9\u6587\u4ef6 RandomizedSearchCV \u7528\u7c21\u55ae\u8853\u8a9e\u8b93\u4f60\u770b\u5230\u8c9d\u8449\u65af\u512a\u5316\u4e4b\u7f8e \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/","text":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u6a5f\u5668\u662f\u5982\u4f55\u5b78\u7fd2\u7684 \u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 AI \u8207\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u6b63\u5728\u84ec\u52c3\u767c\u5c55\u4e2d\uff0c\u4f60\u80fd\u60f3\u50cf\u4eba\u5de5\u667a\u6167\u66fe\u88ab\u8a8d\u70ba\u662f\u4e00\u500b\u6beb\u7121\u51fa\u8def\u7684\u9818\u57df\u55ce? \u5f9e\u4eba\u5de5\u667a\u6167\u7684\u6642\u9593\u8ef8\u4f86\u770b\u53ef\u4ee5\u5206\u70ba\u4e09\u500b\u71b1\u6f6e\u3002\u7b2c\u4e00\u6b21\u71b1\u6f6e\uff081950~1960\u5e74\uff09\uff0c\u7531\u65bc\u65e9\u671f\u7684\u96fb\u8166\u786c\u9ad4\u8cc7\u6e90\u7684\u4e0d\u8db3\u5c0e\u81f4\u8907\u96dc\u7684\u554f\u984c\u7121\u6cd5\u8f15\u6613\u7684\u89e3\u6c7a\u3002\u7b2c\u4e8c\u6b21\u71b1\u6f6e\uff081980~1990\u5e74\uff09\u5c07\u5e36\u6709\u77e5\u662f\u672c\u9ad4\u7684\u4ee3\u7406\u4eba\u653e\u5165\u6a5f\u5668\u4eba\u4e2d\u4f7f\u5177\u6709\u667a\u6167\uff0c\u4e5f\u5c31\u662f\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u3002\u4f46\u4eba\u985e\u8cc7\u6e90\u6709\u9650\u4e0d\u53ef\u80fd\u628a\u6240\u6709\u7684\u77e5\u8b58\u90fd\u9010\u4e00\u5730\u8f38\u5165\u5230\u96fb\u8166\u3002\u56e0\u6b64\u5927\u5bb6\u958b\u59cb\u601d\u8003\u6a5f\u5668\u662f\u5426\u80fd\u5920\u8b93\u4ed6\u81ea\u5df1\u53bb\u5b78\u7fd2\uff1f\u800c\u4e0d\u662f\u4eba\u985e\u4e00\u6627\u7684\u9935\u5165\u9019\u4e9b\u77e5\u8b58\u3002\u7b2c\u4e09\u6b21\u71b1\u6f6e\uff082000\u5e74~\u73fe\u5728\uff09\u7531\u65bc CPU\u3001GPU \u4ee5\u53ca\u96f2\u7aef\u904b\u7b97\u8cc7\u6e90\u666e\u53ca\uff0c\u65e9\u671f\u8907\u96dc\u96e3\u89e3\u7684\u6f14\u7b97\u6cd5\u9678\u7e8c\u53ef\u4ee5\u900f\u904e\u8d85\u7d1a\u96fb\u8166\u4f86\u89e3\u6c7a\u3002\u7576\u624b\u908a\u6709\u4e86\u5927\u91cf\u7684\u6578\u64da\u5c31\u80fd\u62ff\u4f86\u6a5f\u5668\u5b78\u7fd2\uff0c\u56e0\u6b64\u5927\u5bb6\u8e0f\u5165\u4e86\u5927\u6578\u64da\u4ee5\u53ca\u6df1\u5ea6\u5b78\u7fd2\u7684\u6642\u4ee3\u3002\u6642\u9593\u4e0d\u65b7\u7684\u5f80\u524d\u8d70\uff0c\u4f60\u80fd\u60f3\u50cf\u672a\u4f86\u7684 AI \u5728\u4e16\u754c\u4e0a\u662f\u626e\u6f14\u4ec0\u9ebc\u6a23\u7684\u89d2\u8272\u55ce\uff1f \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u73fe\u4eca\u4eba\u5de5\u667a\u6167\u8207\u6211\u5011\u751f\u6d3b\u7121\u6240\u4e0d\u5728\uff0c\u4f8b\u5982\u6211\u5011\u53ea\u8981\u5c0d\u8457\u624b\u6a5f\u558a\u4e00\u8072\u300cHey Siri !\u300d\u860b\u679c\u624b\u6a5f\u7684\u8a9e\u97f3\u52a9\u7406\u5c31\u80fd\u5e6b\u4f60\u6253\u7406\u597d\u5927\u5c0f\u4e8b\u3002\u6216\u8005\u6b63\u5728\u8d85\u5e02\u8cfc\u7269\u7684\u4f60\u6b63\u5728\u70ba\u8cfc\u8cb7\u54ea\u4e00\u9805\u5546\u54c1\u7169\u60f1\u6642\uff0c\u63a8\u85a6\u7cfb\u7d71\u6a5f\u5668\u4eba\u80fd\u5920\u5373\u6642\u5730\u70ba\u4f60\u505a\u5546\u54c1\u63a8\u85a6\u3002\u770b\u4f3c\u8457\u7c21\u55ae\u7684\u52d5\u4f5c\uff0c\u4f46\u4eba\u5de5\u667a\u6167\u7684\u60c5\u666f\u5728\u4f60\u6211\u65e5\u5e38\u751f\u6d3b\u4e2d\u606f\u606f\u76f8\u95dc\u3002\u4eba\u5de5\u667a\u6167\u4f9d\u7167\u6a5f\u5668\u80fd\u5920\u8655\u7406\u8207\u5224\u65b7\u7684\u80fd\u529b\u5340\u5206\u70ba\u56db\u500b\u5206\u7d1a\uff0c\u5206\u5225\u70ba\u81ea\u52d5\u63a7\u5236\u3001\u63a2\u7d22\u63a8\u8ad6\u3001\u6a5f\u5668\u5b78\u7fd2\u3001\u6df1\u5ea6\u5b78\u7fd2\uff1a \u7b2c\u4e00\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u81ea\u52d5\u63a7\u5236 \u6a5f\u5668\u542b\u6709\u81ea\u52d5\u63a7\u5236\u7684\u529f\u80fd\uff0c\u4e26\u4e14\u7d93\u7531\u611f\u6e2c\u5668\u5075\u6e2c\u74b0\u5883\u7684\u8cc7\u8a0a\u3002\u4f8b\u5982\u900f\u904e\u6eab\u5ea6\u611f\u6e2c\u5668\u4f86\u5075\u6e2c\u7522\u7dda\u7684\u99ac\u9054\u662f\u5426\u904e\u71b1\uff0c\u4e26\u9054\u5230\u505c\u6b62\u904b\u8f49\u6548\u679c\u3002\u6216\u662f\u51b7\u6c23\u4f4e\u65bc20\u5ea6\u6642\u5c31\u9032\u5165\u5f85\u6a5f\u6a21\u5f0f\u2026\u2026\u7b49\u3002\u56e0\u6b64\u7a0b\u5f0f\u8a2d\u8a08\u5e2b\u5fc5\u9808\u5148\u628a\u6240\u6709\u53ef\u80fd\u7684\u60c5\u6cc1\u90fd\u8003\u616e\u9032\u53bb\u624d\u80fd\u5beb\u51fa\u63a7\u5236\u7a0b\u5f0f\u3002\u9019\u5c31\u884d\u4f38\u51fa\u4e00\u4e9b\u554f\u984c\uff0c\u50cf\u662f\u9748\u6d3b\u5ea6\u4e0d\u9ad8\uff0c\u4e14\u9700\u8981\u6709\u7d93\u9a57\u7684\u5c08\u5bb6\u4ecb\u5165\u624d\u80fd\u5b8c\u6210\u3002 \u7b2c\u4e8c\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u63a2\u7d22\u63a8\u8ad6 \u7b2c\u4e8c\u7d1a\u9010\u6f38\u958b\u59cb\u5f37\u8abf\u908f\u8f2f\u63a8\u7406\uff0c\u53ef\u4ee5\u8aaa\u662f\u88dc\u8db3\u7b2c\u4e00\u7d1a\u7684\u554f\u984c\u3002\u900f\u904e\u5c07\u77e5\u8b58\u7d44\u7e54\u6210\u77e5\u8b58\u672c\u9ad4\u4e26\u8b93\u6a5f\u5668\u5f9e\u73fe\u6709\u7684\u8cc7\u8a0a\u4e2d\u53bb\u63a8\u7406\u3002\u5178\u578b\u7684\u4f8b\u5b50\u5c31\u662f\u5c08\u5bb6\u7cfb\u7d71\uff0c\u5b83\u662f\u900f\u904e\u7279\u5b9a\u9818\u57df\u7684\u5c08\u5bb6\u8a02\u5b9a\u51fa\u4e00\u5957\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\uff0c\u4e26\u7522\u751f\u5927\u91cf\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7684\u6392\u5217\u7d44\u5408\u4f86\u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\u3002\u7576\u7136\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u5c31\u5fc5\u9808\u9080\u8acb\u9818\u57df\u7684\u5c08\u5bb6\u70ba\u7cfb\u7d71\u91cf\u8eab\u6253\u9020\u4e00\u5957\u7368\u4e00\u7121\u4e8c\u7684\u898f\u5247\u3002\u7136\u800c\u6bcf\u500b\u4eba\u7684\u89c0\u9ede\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u56e0\u6b64\u4e0d\u540c\u5c08\u5bb6\u9593\u6240\u5236\u5b9a\u7684\u898f\u5247\u53ef\u80fd\u90fd\u4e0d\u592a\u4e00\u6a23\u3002 \u7b2c\u4e09\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6a5f\u5668\u5b78\u7fd2 \u6a5f\u5668\u53ef\u4ee5\u6839\u64da\u8cc7\u6599\u5b78\u7fd2\u5982\u4f55\u5c07\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7522\u751f\u95dc\u806f\u3002\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u4e26\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u554f\u984c\u7684\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u61c9\u7528\u5305\u62ec\u641c\u5c0b\u5f15\u64ce\u3001\u5927\u6578\u64da\u5206\u6790\u7b49\u3002\u6211\u5011\u4f9d\u64da\u8cc7\u6599\u8207\u5b78\u7fd2\u65b9\u5f0f\u53ef\u5927\u81f4\u5206\u70ba\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u589e\u5f37\u5f0f\u5b78\u7fd2\uff0c\u6b64\u5916\u81ea\u76e3\u7763\u5b78\u7fd2\u9019\u500b\u540d\u8a5e\u6700\u8fd1\u4e5f\u71b1\u70c8\u7684\u8a0e\u8ad6\u4e2d\u3002 \u7b2c\u56db\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6df1\u5ea6\u5b78\u7fd2 \u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u3002\u5b83\u85c9\u7531\u6a21\u4eff\u4eba\u985e\u5927\u8166\u795e\u7d93\u5143\u7684\u7d50\u69cb\uff0c\u5b9a\u7fa9\u89e3\u6c7a\u554f\u984c\u7684\u51fd\u5f0f\u3002\u6240\u8b02\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u5177\u6709\u6df1\u5ea6\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u3002\u6a5f\u5668\u53ef\u4ee5\u81ea\u884c\u5b78\u7fd2\u4e26\u4e14\u7406\u89e3\u6a5f\u5668\u5b78\u7fd2\u6642\u7528\u4ee5\u8868\u793a\u8cc7\u6599\u7684\u300c\u7279\u5fb5\u300d\uff0c\u56e0\u6b64\u53c8\u7a31\u70ba\u300c\u7279\u5fb5\u8868\u9054\u5b78\u7fd2\u300d\uff0c\u5176\u61c9\u7528\u5305\u62ec\uff1a\u5f71\u50cf\u5206\u985e\u3001\u6a5f\u5668\u7ffb\u8b6f...\u7b49\u3002 \u6a5f\u5668\u5982\u4f55\u5b78\u7fd2\uff1f \u76e3\u7763\u5f0f\u5b78\u7fd2 (Supervised Learning) \u6240\u8b02\u7684\u76e3\u7763\u5f0f\u5b78\u7fd2\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4e26\u7d66\u8207\u7b54\u6848\uff0c\u900f\u904e\u640d\u5931\u51fd\u6578\u8a08\u7b97\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u89e3\u3002\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u6bd4\u5982\u7d66\u6a5f\u5668\u5404\u770b\u4e86 1000 \u5f35\u8c93\u548c\u72d7\u7684\u7167\u7247\u5f8c\u518d\u8a62\u554f\u6a5f\u5668\u65b0\u7684\u4e00\u5f35\u7167\u7247\u4e2d\u662f\u8c93\u9084\u662f\u72d7\u3002\u4e00\u76f4\u4e0d\u65b7\u7684\u8fed\u4ee3\u8a13\u7df4\u4e26\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u6a5f\u5668\u80fd\u6210\u529f\u7684\u5206\u985e\u4e86\u3002 \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 (Unsupervised Learning) \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u53ea\u7d66\u5b9a\u7279\u5fb5\uff0c\u6a5f\u5668\u6703\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u6700\u5e38\u898b\u7684\u65b9\u6cd5\u5c31\u662f\u96c6\u7fa4\u5206\u6790(Cluster Analysis)\uff0c\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u5c07\u8cc7\u6599\u6a23\u672c\u5206\u70ba\u5e7e\u7fa4\u3002\u7c21\u55ae\u4f86\u8aaa\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u5c31\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4f46\u4e0d\u7d66\u4e88\u7b54\u6848\uff0c\u6a21\u578b\u6703\u5f9e\u8cc7\u6599\u4e2d\u81ea\u5df1\u53bb\u627e\u51fa\u95dc\u4fc2\u3002\u900f\u904e\u5206\u7fa4\u6f14\u7b97\u6cd5\u4f86\u8a08\u7b97\u8cc7\u6599\u8207\u8cc7\u6599\u9593\u7684\u76f8\u4f3c\u7a0b\u5ea6\u8207\u8ddd\u96e2\u3002 \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 (Semi-Supervised Learning) \u4ecb\u65bc\u76e3\u7763\u5f0f\u5b78\u7fd2\u8207\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u4e4b\u9593\u3002\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\uff0c\u672a\u6a19\u8a18\u6a23\u672c\u591a\u3001\u6709\u6a19\u8a18\u6a23\u672c\u5c11\u662f\u4e00\u500b\u6bd4\u50f9\u666e\u904d\u73fe\u8c61\uff0c\u5982\u4f55\u5229\u7528\u597d\u672a\u6a19\u8a18\u6a23\u672c\u4f86\u63d0\u5347\u6a21\u578b\u6cdb\u5316\u80fd\u529b\uff0c\u5c31\u662f\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7814\u7a76\u7684\u91cd\u9ede\u3002\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7684\u61c9\u7528\u4e3b\u8981\u5728\u65bc\u6536\u96c6\u8cc7\u6599\u5f88\u7c21\u55ae\uff0c\u4f46\u6a19\u8a18\u7684\u8cc7\u6599\u592a\u5c11\u4e86\uff0c\u6211\u5011\u5e0c\u671b\u53ef\u4ee5\u81ea\u52d5\u6a19\u8a18\u8cc7\u6599\u3002 \u5f37\u5316\u5f0f\u5b78\u7fd2 (Reinforcement Learning) \u5728\u5f37\u5316\u5f0f\u5b78\u7fd2\u4e2d\u6a5f\u5668\u6703\u9032\u884c\u4e00\u7cfb\u5217\u7684\u52d5\u4f5c\uff0c\u800c\u6bcf\u505a\u4e00\u500b\u52d5\u4f5c\u3001\u74b0\u5883\u90fd\u6703\u8ddf\u8457\u767c\u751f\u8b8a\u5316\u3002\u82e5\u74b0\u5883\u7684\u8b8a\u5316\u662f\u96e2\u76ee\u6a19\u66f4\u63a5\u8fd1\uff0c\u6211\u5011\u5c31\u6703\u7d66\u4e88\u4e00\u500b\u6b63\u5411\u53cd\u994b\u3002\u82e5\u96e2\u76ee\u6a19\u66f4\u9060\uff0c\u5247\u7d66\u4e88\u8ca0\u5411\u53cd\u994b\u3002\u6a5f\u5668\u900f\u904e\u4e0d\u65b7\u7684\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u5b78\u5230\u4e86\u5982\u4f55\u53bb\u89e3\u6c7a\u4e00\u4ef6\u4e8b\u60c5\u3002 \u81ea\u76e3\u7763\u5b78\u7fd2 (Self-Supervised Learning) \u81ea\u76e3\u7763\u5b78\u7fd2\u662f\u7531\u5377\u7a4d\u795e\u7d93\u4e4b\u7236 Yann LeCun \u65bc 2019 \u5e74\u6240\u63d0\u51fa\u4f86\u7684\u4e00\u7a2e\u5b78\u7fd2\u6a5f\u5236\u3002\u6b64\u5b78\u7fd2\u6a5f\u5236\u6a21\u4eff\u6a21\u4eff\u4eba\u985e\u7684\u5b78\u7fd2\u884c\u70ba\uff0c\u900f\u904e\u7576\u524d\u4efb\u52d9\u89c0\u5bdf\u6240\u5f97\u5230\u7684\u7279\u5fb5\uff0c\u4e26\u8a13\u7df4\u4e00\u500b\u76ee\u6a19\u4efb\u52d9\u7684\u6a21\u578b\u3002\u800c\u4e14\u5b78\u7fd2\u904e\u7a0b\u4e2d\u4e26\u4e0d\u4ef0\u8cf4\u4eba\u985e\u7d66\u5b9a\u7684\u6a19\u7c64\u3002\u7c21\u55ae\u4f86\u8aaa\u8a13\u7df4\u904e\u7a0b\u662f\u62ff\u4e00\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u900f\u904e\u975e\u76e3\u7763\u5f0f\u6280\u5de7 pre-text task \u8a13\u7df4\u597d\u6a21\u578b\uff0c\u8a13\u7df4\u5b8c\u6210\u5f8c\u518d\u63a5\u5230\u4e0b\u6e38\u4efb\u52d9\u505a\u6700\u5f8c\u7684\u6a21\u578b\u5fae\u8abf (fine tune)\u3002 \u5b78 AI \u8a72\u7528\u54ea\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\uff1f Python \u662f\u8fd1\u5e74\u4f86\u9ad8\u901f\u6210\u9577\u4e26\u4e14\u9010\u6f38\u666e\u53ca\u7684\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u4e5f\u53ef\u4ee5\u8aaa\u662f\u6700\u5bb9\u6613\u4e0a\u624b\u7684\u7a0b\u5f0f\u8a9e\u8a00\u4e4b\u4e00\u3002\u4e3b\u8981\u5728\u65bc\u5b83\u7684\u8a9e\u6cd5\u662f\u7c21\u5316\u800c\u4e0d\u8907\u96dc\u7684\uff0c\u540c\u6642\u5f37\u8abf\u7a0b\u5f0f\u78bc\u7684\u53ef\u8b80\u6027\u56e0\u6b64\u66f4\u80fd\u8cbc\u8fd1\u7a0b\u5f0f\u8a2d\u8a08\u8005\u7684\u601d\u7dad\u3002\u7576\u7136\u4e5f\u4e9b\u4eba\u4f7f\u7528 R \u8a9e\u8a00\u9032\u884c\u7d71\u8a08\u5206\u6790\u3001\u7e6a\u5716\u4ee5\u53ca\u8cc7\u6599\u63a2\u52d8\u751a\u81f3\u5efa\u6a21\u3002\u5982\u679c\u4f60\u6b63\u5728\u7336\u8c6b\u8981\u5165\u5751\u54ea\u4e00\u985e\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u7b46\u8005\u9019\u88e1\u63a8\u85a6 Python \u7a0b\u5f0f\u8a9e\u8a00\u3002\u56e0\u70ba Python \u4e0d\u50c5\u53ef\u4ee5\u9032\u884c\u8cc7\u6599\u5206\u6790\u3001\u6a5f\u5668\u5b78\u7fd2\u4e5f\u80fd\u9032\u884c\u524d/\u5f8c\u7aef\u958b\u767c\u3002\u53e6\u5916 Python \u6709\u8c50\u5bcc\u7684\u8a0e\u8ad6\u793e\u7fa4\u4ee5\u53ca\u8a31\u591a\u958b\u6e90\u5957\u4ef6\u652f\u63f4\uff0c\u5927\u5e45\u7684\u964d\u4f4e\u5b78\u7fd2\u9580\u6abb\u3002 \u8aaa\u4e86\u9019\u9ebc\u591a\uff01\u5927\u5bb6\u6e96\u5099\u597d\u4e86\u55ce\uff1f\u5feb\u6e96\u5099\u597d\u96fb\u8166\u8207\u7b46\u8a18\u672c\uff0c\u597d\u597d\u7684\u70ba\u81ea\u5df1\u9032\u884c\u4e09\u5341\u5929\u7684\u5145\u96fb\u5427\uff5e Let's Go! \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#day-2-ai","text":"","title":"[Day 2] \u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_1","text":"\u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032 \u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a \u6a5f\u5668\u662f\u5982\u4f55\u5b78\u7fd2\u7684","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_2","text":"AI \u8207\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u6b63\u5728\u84ec\u52c3\u767c\u5c55\u4e2d\uff0c\u4f60\u80fd\u60f3\u50cf\u4eba\u5de5\u667a\u6167\u66fe\u88ab\u8a8d\u70ba\u662f\u4e00\u500b\u6beb\u7121\u51fa\u8def\u7684\u9818\u57df\u55ce? \u5f9e\u4eba\u5de5\u667a\u6167\u7684\u6642\u9593\u8ef8\u4f86\u770b\u53ef\u4ee5\u5206\u70ba\u4e09\u500b\u71b1\u6f6e\u3002\u7b2c\u4e00\u6b21\u71b1\u6f6e\uff081950~1960\u5e74\uff09\uff0c\u7531\u65bc\u65e9\u671f\u7684\u96fb\u8166\u786c\u9ad4\u8cc7\u6e90\u7684\u4e0d\u8db3\u5c0e\u81f4\u8907\u96dc\u7684\u554f\u984c\u7121\u6cd5\u8f15\u6613\u7684\u89e3\u6c7a\u3002\u7b2c\u4e8c\u6b21\u71b1\u6f6e\uff081980~1990\u5e74\uff09\u5c07\u5e36\u6709\u77e5\u662f\u672c\u9ad4\u7684\u4ee3\u7406\u4eba\u653e\u5165\u6a5f\u5668\u4eba\u4e2d\u4f7f\u5177\u6709\u667a\u6167\uff0c\u4e5f\u5c31\u662f\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u3002\u4f46\u4eba\u985e\u8cc7\u6e90\u6709\u9650\u4e0d\u53ef\u80fd\u628a\u6240\u6709\u7684\u77e5\u8b58\u90fd\u9010\u4e00\u5730\u8f38\u5165\u5230\u96fb\u8166\u3002\u56e0\u6b64\u5927\u5bb6\u958b\u59cb\u601d\u8003\u6a5f\u5668\u662f\u5426\u80fd\u5920\u8b93\u4ed6\u81ea\u5df1\u53bb\u5b78\u7fd2\uff1f\u800c\u4e0d\u662f\u4eba\u985e\u4e00\u6627\u7684\u9935\u5165\u9019\u4e9b\u77e5\u8b58\u3002\u7b2c\u4e09\u6b21\u71b1\u6f6e\uff082000\u5e74~\u73fe\u5728\uff09\u7531\u65bc CPU\u3001GPU \u4ee5\u53ca\u96f2\u7aef\u904b\u7b97\u8cc7\u6e90\u666e\u53ca\uff0c\u65e9\u671f\u8907\u96dc\u96e3\u89e3\u7684\u6f14\u7b97\u6cd5\u9678\u7e8c\u53ef\u4ee5\u900f\u904e\u8d85\u7d1a\u96fb\u8166\u4f86\u89e3\u6c7a\u3002\u7576\u624b\u908a\u6709\u4e86\u5927\u91cf\u7684\u6578\u64da\u5c31\u80fd\u62ff\u4f86\u6a5f\u5668\u5b78\u7fd2\uff0c\u56e0\u6b64\u5927\u5bb6\u8e0f\u5165\u4e86\u5927\u6578\u64da\u4ee5\u53ca\u6df1\u5ea6\u5b78\u7fd2\u7684\u6642\u4ee3\u3002\u6642\u9593\u4e0d\u65b7\u7684\u5f80\u524d\u8d70\uff0c\u4f60\u80fd\u60f3\u50cf\u672a\u4f86\u7684 AI \u5728\u4e16\u754c\u4e0a\u662f\u626e\u6f14\u4ec0\u9ebc\u6a23\u7684\u89d2\u8272\u55ce\uff1f","title":"\u4eba\u5de5\u667a\u6167\u7684\u6f14\u9032"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_3","text":"\u73fe\u4eca\u4eba\u5de5\u667a\u6167\u8207\u6211\u5011\u751f\u6d3b\u7121\u6240\u4e0d\u5728\uff0c\u4f8b\u5982\u6211\u5011\u53ea\u8981\u5c0d\u8457\u624b\u6a5f\u558a\u4e00\u8072\u300cHey Siri !\u300d\u860b\u679c\u624b\u6a5f\u7684\u8a9e\u97f3\u52a9\u7406\u5c31\u80fd\u5e6b\u4f60\u6253\u7406\u597d\u5927\u5c0f\u4e8b\u3002\u6216\u8005\u6b63\u5728\u8d85\u5e02\u8cfc\u7269\u7684\u4f60\u6b63\u5728\u70ba\u8cfc\u8cb7\u54ea\u4e00\u9805\u5546\u54c1\u7169\u60f1\u6642\uff0c\u63a8\u85a6\u7cfb\u7d71\u6a5f\u5668\u4eba\u80fd\u5920\u5373\u6642\u5730\u70ba\u4f60\u505a\u5546\u54c1\u63a8\u85a6\u3002\u770b\u4f3c\u8457\u7c21\u55ae\u7684\u52d5\u4f5c\uff0c\u4f46\u4eba\u5de5\u667a\u6167\u7684\u60c5\u666f\u5728\u4f60\u6211\u65e5\u5e38\u751f\u6d3b\u4e2d\u606f\u606f\u76f8\u95dc\u3002\u4eba\u5de5\u667a\u6167\u4f9d\u7167\u6a5f\u5668\u80fd\u5920\u8655\u7406\u8207\u5224\u65b7\u7684\u80fd\u529b\u5340\u5206\u70ba\u56db\u500b\u5206\u7d1a\uff0c\u5206\u5225\u70ba\u81ea\u52d5\u63a7\u5236\u3001\u63a2\u7d22\u63a8\u8ad6\u3001\u6a5f\u5668\u5b78\u7fd2\u3001\u6df1\u5ea6\u5b78\u7fd2\uff1a","title":"\u4eba\u5de5\u667a\u6167\u7684\u5206\u7d1a"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_4","text":"\u6a5f\u5668\u542b\u6709\u81ea\u52d5\u63a7\u5236\u7684\u529f\u80fd\uff0c\u4e26\u4e14\u7d93\u7531\u611f\u6e2c\u5668\u5075\u6e2c\u74b0\u5883\u7684\u8cc7\u8a0a\u3002\u4f8b\u5982\u900f\u904e\u6eab\u5ea6\u611f\u6e2c\u5668\u4f86\u5075\u6e2c\u7522\u7dda\u7684\u99ac\u9054\u662f\u5426\u904e\u71b1\uff0c\u4e26\u9054\u5230\u505c\u6b62\u904b\u8f49\u6548\u679c\u3002\u6216\u662f\u51b7\u6c23\u4f4e\u65bc20\u5ea6\u6642\u5c31\u9032\u5165\u5f85\u6a5f\u6a21\u5f0f\u2026\u2026\u7b49\u3002\u56e0\u6b64\u7a0b\u5f0f\u8a2d\u8a08\u5e2b\u5fc5\u9808\u5148\u628a\u6240\u6709\u53ef\u80fd\u7684\u60c5\u6cc1\u90fd\u8003\u616e\u9032\u53bb\u624d\u80fd\u5beb\u51fa\u63a7\u5236\u7a0b\u5f0f\u3002\u9019\u5c31\u884d\u4f38\u51fa\u4e00\u4e9b\u554f\u984c\uff0c\u50cf\u662f\u9748\u6d3b\u5ea6\u4e0d\u9ad8\uff0c\u4e14\u9700\u8981\u6709\u7d93\u9a57\u7684\u5c08\u5bb6\u4ecb\u5165\u624d\u80fd\u5b8c\u6210\u3002","title":"\u7b2c\u4e00\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u81ea\u52d5\u63a7\u5236"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_5","text":"\u7b2c\u4e8c\u7d1a\u9010\u6f38\u958b\u59cb\u5f37\u8abf\u908f\u8f2f\u63a8\u7406\uff0c\u53ef\u4ee5\u8aaa\u662f\u88dc\u8db3\u7b2c\u4e00\u7d1a\u7684\u554f\u984c\u3002\u900f\u904e\u5c07\u77e5\u8b58\u7d44\u7e54\u6210\u77e5\u8b58\u672c\u9ad4\u4e26\u8b93\u6a5f\u5668\u5f9e\u73fe\u6709\u7684\u8cc7\u8a0a\u4e2d\u53bb\u63a8\u7406\u3002\u5178\u578b\u7684\u4f8b\u5b50\u5c31\u662f\u5c08\u5bb6\u7cfb\u7d71\uff0c\u5b83\u662f\u900f\u904e\u7279\u5b9a\u9818\u57df\u7684\u5c08\u5bb6\u8a02\u5b9a\u51fa\u4e00\u5957\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\uff0c\u4e26\u7522\u751f\u5927\u91cf\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7684\u6392\u5217\u7d44\u5408\u4f86\u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\u3002\u7576\u7136\u6240\u8b02\u7684\u5c08\u5bb6\u7cfb\u7d71\u5c31\u5fc5\u9808\u9080\u8acb\u9818\u57df\u7684\u5c08\u5bb6\u70ba\u7cfb\u7d71\u91cf\u8eab\u6253\u9020\u4e00\u5957\u7368\u4e00\u7121\u4e8c\u7684\u898f\u5247\u3002\u7136\u800c\u6bcf\u500b\u4eba\u7684\u89c0\u9ede\u53ef\u80fd\u90fd\u4e0d\u540c\uff0c\u56e0\u6b64\u4e0d\u540c\u5c08\u5bb6\u9593\u6240\u5236\u5b9a\u7684\u898f\u5247\u53ef\u80fd\u90fd\u4e0d\u592a\u4e00\u6a23\u3002","title":"\u7b2c\u4e8c\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u63a2\u7d22\u63a8\u8ad6"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_6","text":"\u6a5f\u5668\u53ef\u4ee5\u6839\u64da\u8cc7\u6599\u5b78\u7fd2\u5982\u4f55\u5c07\u8f38\u5165\u8207\u8f38\u51fa\u8cc7\u6599\u7522\u751f\u95dc\u806f\u3002\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u4e26\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u554f\u984c\u7684\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u61c9\u7528\u5305\u62ec\u641c\u5c0b\u5f15\u64ce\u3001\u5927\u6578\u64da\u5206\u6790\u7b49\u3002\u6211\u5011\u4f9d\u64da\u8cc7\u6599\u8207\u5b78\u7fd2\u65b9\u5f0f\u53ef\u5927\u81f4\u5206\u70ba\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u3001\u589e\u5f37\u5f0f\u5b78\u7fd2\uff0c\u6b64\u5916\u81ea\u76e3\u7763\u5b78\u7fd2\u9019\u500b\u540d\u8a5e\u6700\u8fd1\u4e5f\u71b1\u70c8\u7684\u8a0e\u8ad6\u4e2d\u3002","title":"\u7b2c\u4e09\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6a5f\u5668\u5b78\u7fd2"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_7","text":"\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u3002\u5b83\u85c9\u7531\u6a21\u4eff\u4eba\u985e\u5927\u8166\u795e\u7d93\u5143\u7684\u7d50\u69cb\uff0c\u5b9a\u7fa9\u89e3\u6c7a\u554f\u984c\u7684\u51fd\u5f0f\u3002\u6240\u8b02\u6df1\u5ea6\u5b78\u7fd2\u662f\u4e00\u7a2e\u5177\u6709\u6df1\u5ea6\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u3002\u6a5f\u5668\u53ef\u4ee5\u81ea\u884c\u5b78\u7fd2\u4e26\u4e14\u7406\u89e3\u6a5f\u5668\u5b78\u7fd2\u6642\u7528\u4ee5\u8868\u793a\u8cc7\u6599\u7684\u300c\u7279\u5fb5\u300d\uff0c\u56e0\u6b64\u53c8\u7a31\u70ba\u300c\u7279\u5fb5\u8868\u9054\u5b78\u7fd2\u300d\uff0c\u5176\u61c9\u7528\u5305\u62ec\uff1a\u5f71\u50cf\u5206\u985e\u3001\u6a5f\u5668\u7ffb\u8b6f...\u7b49\u3002","title":"\u7b2c\u56db\u7d1a\u4eba\u5de5\u667a\u6167\uff1a\u6df1\u5ea6\u5b78\u7fd2"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#_8","text":"","title":"\u6a5f\u5668\u5982\u4f55\u5b78\u7fd2\uff1f"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#supervised-learning","text":"\u6240\u8b02\u7684\u76e3\u7763\u5f0f\u5b78\u7fd2\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4e26\u7d66\u8207\u7b54\u6848\uff0c\u900f\u904e\u640d\u5931\u51fd\u6578\u8a08\u7b97\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u89e3\u3002\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u6bd4\u5982\u7d66\u6a5f\u5668\u5404\u770b\u4e86 1000 \u5f35\u8c93\u548c\u72d7\u7684\u7167\u7247\u5f8c\u518d\u8a62\u554f\u6a5f\u5668\u65b0\u7684\u4e00\u5f35\u7167\u7247\u4e2d\u662f\u8c93\u9084\u662f\u72d7\u3002\u4e00\u76f4\u4e0d\u65b7\u7684\u8fed\u4ee3\u8a13\u7df4\u4e26\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u6a5f\u5668\u80fd\u6210\u529f\u7684\u5206\u985e\u4e86\u3002","title":"\u76e3\u7763\u5f0f\u5b78\u7fd2 (Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#unsupervised-learning","text":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u53ea\u7d66\u5b9a\u7279\u5fb5\uff0c\u6a5f\u5668\u6703\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u6700\u5e38\u898b\u7684\u65b9\u6cd5\u5c31\u662f\u96c6\u7fa4\u5206\u6790(Cluster Analysis)\uff0c\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u5c07\u8cc7\u6599\u6a23\u672c\u5206\u70ba\u5e7e\u7fa4\u3002\u7c21\u55ae\u4f86\u8aaa\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u5c31\u662f\u7d66\u8a31\u591a\u8cc7\u6599\u4f46\u4e0d\u7d66\u4e88\u7b54\u6848\uff0c\u6a21\u578b\u6703\u5f9e\u8cc7\u6599\u4e2d\u81ea\u5df1\u53bb\u627e\u51fa\u95dc\u4fc2\u3002\u900f\u904e\u5206\u7fa4\u6f14\u7b97\u6cd5\u4f86\u8a08\u7b97\u8cc7\u6599\u8207\u8cc7\u6599\u9593\u7684\u76f8\u4f3c\u7a0b\u5ea6\u8207\u8ddd\u96e2\u3002","title":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 (Unsupervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#semi-supervised-learning","text":"\u4ecb\u65bc\u76e3\u7763\u5f0f\u5b78\u7fd2\u8207\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u4e4b\u9593\u3002\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\uff0c\u672a\u6a19\u8a18\u6a23\u672c\u591a\u3001\u6709\u6a19\u8a18\u6a23\u672c\u5c11\u662f\u4e00\u500b\u6bd4\u50f9\u666e\u904d\u73fe\u8c61\uff0c\u5982\u4f55\u5229\u7528\u597d\u672a\u6a19\u8a18\u6a23\u672c\u4f86\u63d0\u5347\u6a21\u578b\u6cdb\u5316\u80fd\u529b\uff0c\u5c31\u662f\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7814\u7a76\u7684\u91cd\u9ede\u3002\u534a\u76e3\u5f0f\u7763\u5b78\u7fd2\u7684\u61c9\u7528\u4e3b\u8981\u5728\u65bc\u6536\u96c6\u8cc7\u6599\u5f88\u7c21\u55ae\uff0c\u4f46\u6a19\u8a18\u7684\u8cc7\u6599\u592a\u5c11\u4e86\uff0c\u6211\u5011\u5e0c\u671b\u53ef\u4ee5\u81ea\u52d5\u6a19\u8a18\u8cc7\u6599\u3002","title":"\u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 (Semi-Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#reinforcement-learning","text":"\u5728\u5f37\u5316\u5f0f\u5b78\u7fd2\u4e2d\u6a5f\u5668\u6703\u9032\u884c\u4e00\u7cfb\u5217\u7684\u52d5\u4f5c\uff0c\u800c\u6bcf\u505a\u4e00\u500b\u52d5\u4f5c\u3001\u74b0\u5883\u90fd\u6703\u8ddf\u8457\u767c\u751f\u8b8a\u5316\u3002\u82e5\u74b0\u5883\u7684\u8b8a\u5316\u662f\u96e2\u76ee\u6a19\u66f4\u63a5\u8fd1\uff0c\u6211\u5011\u5c31\u6703\u7d66\u4e88\u4e00\u500b\u6b63\u5411\u53cd\u994b\u3002\u82e5\u96e2\u76ee\u6a19\u66f4\u9060\uff0c\u5247\u7d66\u4e88\u8ca0\u5411\u53cd\u994b\u3002\u6a5f\u5668\u900f\u904e\u4e0d\u65b7\u7684\u5f9e\u932f\u8aa4\u4e2d\u53bb\u5b78\u7fd2\uff0c\u6700\u7d42\u5b78\u5230\u4e86\u5982\u4f55\u53bb\u89e3\u6c7a\u4e00\u4ef6\u4e8b\u60c5\u3002","title":"\u5f37\u5316\u5f0f\u5b78\u7fd2 (Reinforcement Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#self-supervised-learning","text":"\u81ea\u76e3\u7763\u5b78\u7fd2\u662f\u7531\u5377\u7a4d\u795e\u7d93\u4e4b\u7236 Yann LeCun \u65bc 2019 \u5e74\u6240\u63d0\u51fa\u4f86\u7684\u4e00\u7a2e\u5b78\u7fd2\u6a5f\u5236\u3002\u6b64\u5b78\u7fd2\u6a5f\u5236\u6a21\u4eff\u6a21\u4eff\u4eba\u985e\u7684\u5b78\u7fd2\u884c\u70ba\uff0c\u900f\u904e\u7576\u524d\u4efb\u52d9\u89c0\u5bdf\u6240\u5f97\u5230\u7684\u7279\u5fb5\uff0c\u4e26\u8a13\u7df4\u4e00\u500b\u76ee\u6a19\u4efb\u52d9\u7684\u6a21\u578b\u3002\u800c\u4e14\u5b78\u7fd2\u904e\u7a0b\u4e2d\u4e26\u4e0d\u4ef0\u8cf4\u4eba\u985e\u7d66\u5b9a\u7684\u6a19\u7c64\u3002\u7c21\u55ae\u4f86\u8aaa\u8a13\u7df4\u904e\u7a0b\u662f\u62ff\u4e00\u500b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u900f\u904e\u975e\u76e3\u7763\u5f0f\u6280\u5de7 pre-text task \u8a13\u7df4\u597d\u6a21\u578b\uff0c\u8a13\u7df4\u5b8c\u6210\u5f8c\u518d\u63a5\u5230\u4e0b\u6e38\u4efb\u52d9\u505a\u6700\u5f8c\u7684\u6a21\u578b\u5fae\u8abf (fine tune)\u3002","title":"\u81ea\u76e3\u7763\u5b78\u7fd2 (Self-Supervised Learning)"},{"location":"2.\u5feb\u4f86\u63a2\u7d22AI\u7684\u4e16\u754c/#ai","text":"Python \u662f\u8fd1\u5e74\u4f86\u9ad8\u901f\u6210\u9577\u4e26\u4e14\u9010\u6f38\u666e\u53ca\u7684\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u4e5f\u53ef\u4ee5\u8aaa\u662f\u6700\u5bb9\u6613\u4e0a\u624b\u7684\u7a0b\u5f0f\u8a9e\u8a00\u4e4b\u4e00\u3002\u4e3b\u8981\u5728\u65bc\u5b83\u7684\u8a9e\u6cd5\u662f\u7c21\u5316\u800c\u4e0d\u8907\u96dc\u7684\uff0c\u540c\u6642\u5f37\u8abf\u7a0b\u5f0f\u78bc\u7684\u53ef\u8b80\u6027\u56e0\u6b64\u66f4\u80fd\u8cbc\u8fd1\u7a0b\u5f0f\u8a2d\u8a08\u8005\u7684\u601d\u7dad\u3002\u7576\u7136\u4e5f\u4e9b\u4eba\u4f7f\u7528 R \u8a9e\u8a00\u9032\u884c\u7d71\u8a08\u5206\u6790\u3001\u7e6a\u5716\u4ee5\u53ca\u8cc7\u6599\u63a2\u52d8\u751a\u81f3\u5efa\u6a21\u3002\u5982\u679c\u4f60\u6b63\u5728\u7336\u8c6b\u8981\u5165\u5751\u54ea\u4e00\u985e\u7a0b\u5f0f\u8a9e\u8a00\uff0c\u7b46\u8005\u9019\u88e1\u63a8\u85a6 Python \u7a0b\u5f0f\u8a9e\u8a00\u3002\u56e0\u70ba Python \u4e0d\u50c5\u53ef\u4ee5\u9032\u884c\u8cc7\u6599\u5206\u6790\u3001\u6a5f\u5668\u5b78\u7fd2\u4e5f\u80fd\u9032\u884c\u524d/\u5f8c\u7aef\u958b\u767c\u3002\u53e6\u5916 Python \u6709\u8c50\u5bcc\u7684\u8a0e\u8ad6\u793e\u7fa4\u4ee5\u53ca\u8a31\u591a\u958b\u6e90\u5957\u4ef6\u652f\u63f4\uff0c\u5927\u5e45\u7684\u964d\u4f4e\u5b78\u7fd2\u9580\u6abb\u3002 \u8aaa\u4e86\u9019\u9ebc\u591a\uff01\u5927\u5bb6\u6e96\u5099\u597d\u4e86\u55ce\uff1f\u5feb\u6e96\u5099\u597d\u96fb\u8166\u8207\u7b46\u8a18\u672c\uff0c\u597d\u597d\u7684\u70ba\u81ea\u5df1\u9032\u884c\u4e09\u5341\u5929\u7684\u5145\u96fb\u5427\uff5e Let's Go! \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5b78 AI \u8a72\u7528\u54ea\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\uff1f"},{"location":"20.Auto-Sklearn/","text":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 Auto-sklearn \u904b\u4f5c\u539f\u7406 Meta Learning Bayesian Optimization Build Ensemble \u5be6\u4f5c Auto-sklearn \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u8a13\u7df4\uff0c\u4e26\u6bd4\u8f03\u5169\u7a2e\u4e0d\u540c\u7248\u672c\u7684 Auto-sklearn\u3002 \u4f7f\u7528 pipelineprofiler \u8996\u89ba\u5316 AutoML \u6a21\u578b\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 Auto-sklearn \u63a1\u7528\u5143\u5b78\u7fd2 (Meta Learning) \u9078\u64c7\u6a21\u578b\u548c\u8d85\u53c3\u6578\u512a\u5316\u7684\u65b9\u6cd5\u4f5c\u70ba\u641c\u5c0b\u6700\u4f73\u6a21\u578b\u7684\u91cd\u9ede\u3002\u6b64 AutoML \u5957\u4ef6\u4e3b\u8981\u662f\u641c\u5c0b\u6240\u6709 Sklearn \u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ee5\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u4e26\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316 (Bayesian Optimization) \u8207\u81ea\u52d5\u6574\u5408 (Ensemble Selection) \u7684\u67b6\u69cb\u5728\u6709\u9650\u6642\u9593\u5167\u641c\u5c0b\u6700\u4f73\u7684\u6a21\u578b\u3002\u7b2c\u4e00\u7248\u7684 Auto-sklearn \u65bc 2015 \u5e74\u767c\u8868\u5728 NIPS(Neural Information Processing Systems) \u6703\u8b70\u4e0a\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Efficient and Robust Automated Machine Learning \u3002\u6709\u5225\u65bc\u5176\u4ed6\u7684 AutoML \u65b9\u6cd5\uff0cAuto-sklearn \u63d0\u51fa\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\u6539\u5584\u4e86\u8c9d\u8449\u65af\u512a\u5316\u5728\u521d\u59cb\u51b7\u555f\u52d5\u7684\u7f3a\u9ede\uff0c\u4e26\u63d0\u4f9b\u4e00\u500b\u597d\u7684\u63a1\u6a23\u65b9\u5411\u66f4\u5feb\u901f\u5c0b\u627e\u6700\u4f73\u7684\u6a21\u578b[1]\u3002\u7b2c\u4e8c\u500b\u7248\u672c\u65bc 2020 \u5e74\u767c\u5e03\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning \u3002\u5728\u65b0\u7684\u7248\u672c\u4e2d\u4fee\u6539\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\uff0c\u4e26\u4e0d\u4f9d\u8cf4\u5143\u7279\u5fb5\u4f86\u9078\u64c7\u6a21\u578b\u9078\u64c7\u8207\u8abf\u53c3\u7b56\u7565\u3002\u800c\u662f\u5f15\u5165\u4e86\u4e00\u500b\u5143\u5b78\u7fd2\u7b56\u7565\u9078\u64c7\u5668\uff0c\u6839\u64da\u8cc7\u6599\u96c6\u4e2d\u7684\u6a23\u672c\u6578\u91cf\u548c\u7279\u5fb5\uff0c\u8a02\u5b9a\u4e86\u4e00\u500b\u6a21\u578b\u9078\u64c7\u7684\u7b56\u7565[3]\u3002 AutoML \u8996\u70ba CASH \u554f\u984c \u5728\u8ad6\u6587\u4e2d\u4f5c\u8005\u5c07 AutoML \u8996\u70ba\u6f14\u7b97\u6cd5\u9078\u64c7\u548c\u8d85\u53c3\u6578\u512a\u5316 (Combined Algorithm Selection and Hyperparameter, CASH) \u7684\u7d44\u5408\u6700\u4f73\u5316\u554f\u984c\u3002\u56e0\u70ba\u5728 AutoML \u9818\u57df\u7576\u4e2d\u5c07\u6703\u9762\u81e8\u5169\u500b\u554f\u984c\u3002\u7b2c\u4e00\u500b\u662f\u6c92\u6709\u4efb\u4f55\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u662f\u53ef\u4ee5\u4fdd\u8b49\u5728\u6240\u6709\u7684\u8cc7\u6599\u96c6\u4e2d\u8868\u73fe\u6700\u597d\uff0c\u56e0\u6b64\u6311\u9078\u4e00\u500b\u597d\u7684\u6f14\u7b97\u6cd5\u662f\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u7684\u9996\u8981\u4efb\u52d9\u3002\u7b2c\u4e8c\u8a31\u591a\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u5f80\u5f80\u4f9d\u8cf4\u65bc\u8d85\u53c3\u6578\uff0c\u900f\u904e\u4e0d\u540c\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u53ef\u4ee5\u53d6\u5f97\u66f4\u597d\u7684\u5b78\u7fd2\u7d50\u679c\u3002\u4f8b\u5982\u5728 SVM \u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e0d\u540c\u7684\u6838\u6280\u5de7\u8b93\u6a21\u578b\u5177\u6709\u975e\u7dda\u6027\u7684\u80fd\u529b\uff0c\u6216\u662f\u900f\u904e\u8d85\u53c3\u6578 C \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002\u7136\u800c\u8c9d\u8449\u65af\u512a\u5316\u5982\u4eca\u6210\u70ba AutoML \u8d85\u53c3\u6578\u641c\u5c0b\u7684\u91cd\u8981\u6838\u5fc3\u65b9\u6cd5\u3002 Auto-sklearn \u67b6\u69cb Auto-sklearn \u53ef\u4ee5\u88ab\u62ff\u4f86\u8655\u7406\u8ff4\u6b78\u548c\u5206\u985e\u7684\u554f\u984c\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u6240\u7e6a\u88fd\u7684\u67b6\u69cb\u5716\u3002\u6211\u5011\u53ef\u4ee5\u5c07 Auto-sklearn \u5207\u6210\u4e09\u500b\u90e8\u5206\uff0c\u5176\u4e2d\u7b2c\u4e00\u500b\u662f\u5f15\u5165\u5143\u5b78\u7fd2\u6a5f\u5236\u4f86\u6a21\u4eff\u5c08\u5bb6\u5728\u8655\u7406\u6a5f\u5668\u5b78\u7fd2\u7684\u5148\u9a57\u77e5\u8b58\u3002\u4e26\u63a1\u7528\u5143\u7279\u5fb5\u8b93\u6211\u5011\u66f4\u6709\u6548\u7387\u7684\u53bb\u6c7a\u5b9a\u5728\u65b0\u7684\u8cc7\u6599\u96c6\u4e2d\u8a72\u6311\u9078\u54ea\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u63a5\u8457\u6311\u597d\u6a21\u578b\u5f8c\u4e26\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u6311\u9078\u5408\u9069\u7684\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u4ee5\u53ca\u5617\u8a66\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\u3002\u6700\u5f8c\u6311\u9078\u5e7e\u500b\u4e0d\u932f\u7684\u6a21\u578b\u4e26\u900f\u904e\u6574\u9ad4\u5b78\u7fd2\u7684\u6280\u5de7\u9032\u884c\u6a21\u578b\u5806\u758a\uff0c\u5c07\u8868\u73fe\u4e0d\u932f\u7684\u6a21\u578b\u8f38\u51fa\u7d50\u679c\u505a\u4e00\u500b\u52a0\u6b0a\u548c\u6216\u662f\u6295\u7968\u3002 Meta Learning Bayesian Optimization Build Ensemble Meta Learning \u7576\u6211\u5011\u60f3\u5c0d\u65b0\u8cc7\u6599\u96c6\u505a\u5206\u985e\u6216\u8ff4\u6b78\u6642\uff0cAuto-sklearn \u6703\u5148\u63d0\u53d6\u5143\u7279\u5fb5\uff0c\u5177\u6709\u76f8\u4f3c\u5143\u7279\u5fb5\u7684\u8cc7\u6599\u96c6\u5728\u540c\u4e00\u7d44\u8d85\u53c3\u6578\u61c9\u8a72\u6703\u6709\u76f8\u4f3c\u7684\u8868\u73fe\u3002\u56e0\u6b64\u900f\u904e\u5143\u7279\u5fb5\u53ef\u4ee5\u6709\u6548\u5730\u8a55\u4f30\u5728\u65b0\u8cc7\u6599\u96c6\u4e0a\u61c9\u8a72\u4f7f\u7528\u54ea\u7a2e\u7b97\u6cd5\u3002\u5143\u5b78\u7fd2\u5728\u9019\u88e1\u7684\u76ee\u7684\u662f\u70ba\u4e86\u8981\u627e\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\u505a\u521d\u59cb\u5316\uff0c\u4f7f\u5176\u5728\u4e00\u958b\u59cb\u7684\u8868\u73fe\u512a\u65bc\u96a8\u6a5f\u7684\u65b9\u6cd5\u3002\u4e26\u63d0\u4f9b\u8c9d\u8449\u65af\u512a\u5316\u6709\u500b\u660e\u78ba\u7684\u65b9\u5411\u3002Auto-sklearn \u53c3\u8003\u4e86 OpenML 140 \u500b\u8cc7\u6599\u96c6\uff0c\u4e26\u5f59\u6574\u4e86 38 \u500b\u5143\u7279\u5fb5\uff0c\u4f8b\u5982\uff1a\u504f\u5ea6\u3001\u5cf0\u5ea6\u3001\u7279\u5fb5\u6578\u91cf\u3001\u985e\u5225\u6578\u91cf......\u7b49\u3002\u9996\u5148\u70ba\u9019 140 \u500b\u8cc7\u6599\u96c6\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316\u9032\u884c\u6a21\u578b\u8a13\u7df4\uff0c\u4e26\u5c07\u9019\u4e9b\u8cc7\u6599\u96c6\u5c0d\u61c9\u7684\u6a21\u578b\u8207\u6700\u4f73\u7684\u8d85\u53c3\u6578\u5132\u5b58\u8d77\u4f86\u3002\u7576\u6709\u65b0\u7684\u8cc7\u6599\u96c6\u9032\u4f86\u6642\u6703\u5148\u900f\u904e\u5143\u7279\u5fb5\u9032\u884c\u76f8\u4f3c\u5ea6\u5339\u914d\uff0c\u4e26\u5c07\u5339\u914d\u7a0b\u5ea6\u6700\u9ad8\u7684\u524d k \u500b\u8cc7\u6599\u96c6 (\u9810\u8a2dk=25) \u6240\u5c0d\u61c9\u7684\u6a21\u578b\u548c\u8d85\u53c3\u6578\u4f5c\u70ba\u8c9d\u8449\u65af\u512a\u5316\u7684\u521d\u59cb\u8a2d\u5b9a\u3002 Bayesian Optimization \u5728\u8c9d\u8449\u65af\u512a\u5316\u7576\u4e2d\u4e3b\u8981\u6703\u5c0b\u627e\u8a72\u8cc7\u6599\u96c6\u4e2d\u6700\u5408\u9069\u7684\u8cc7\u6599\u524d\u8655\u7406 (data pre-processors)\u3001\u7279\u5fb5\u524d\u8655\u7406 (feature pre-processors) \u8207\u5206\u985e/\u8ff4\u6b78\u6a21\u578b\u3002\u4ee5\u4e0a\u4e09\u5927\u985e\u5408\u8a08\u5171\u6709 110 \u500b\u8d85\u53c3\u6578\u5fc5\u9808\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u9069\u5408\u7684\u53c3\u6578\u7d44\u5408\u3002\u5176\u8c9d\u8449\u65af\u512a\u5316\u4e3b\u8981\u65b9\u6cd5\u662f\u900f\u904e\u5efa\u7acb\u76ee\u6a19\u51fd\u6578\u7684\u6a5f\u7387\u6a21\u578b\uff0c\u4e26\u7528\u5b83\u4f86\u9078\u64c7\u6700\u6709\u5e0c\u671b\u7684\u8d85\u53c3\u6578\u4f86\u8a55\u4f30\u771f\u5be6\u7684\u76ee\u6a19\u51fd\u6578\u3002 \u4ee5\u4e0b\u5167\u5bb9\u6458\u9304\u81ea Auto-sklearn v1.0 \u8ad6\u6587\u63d0\u4f9b\u7684\u5167\u5bb9 [1][2] Data Pre-processors \u5728\u8cc7\u6599\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86\u56db\u7a2e\u65b9\u6cd5\u3002\u5305\u542b\u7279\u5fb5\u7e2e\u653e\u3001\u586b\u88dc\u7f3a\u5931\u503c\u3001\u985e\u5225\u7279\u5fb5\u9032\u884c one-hot encoding \u8207\u8655\u7406\u76ee\u6a19\u8f38\u51fa\u985e\u5225\u6578\u91cf\u4e0d\u5e73\u8861\u554f\u984c\u3002 Data Pre-processors \u7279\u5fb5\u7e2e\u653e \u586b\u88dc\u7f3a\u5931\u503c one-hot encoding \u985e\u5225\u8cc7\u6599\u4e0d\u5e73\u8861 \u5728\u65b0\u7684\u7248\u672c\u4e2d\u591a\u4e86\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u65b9\u6cd5\uff0c\u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn data_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002 Feature Pre-processors \u5728\u7279\u5fb5\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86 12 \u7a2e\u7279\u5fb5\u8655\u7406\u7684\u6280\u5de7\uff0c\u7136\u800c\u5728\u773e\u591a\u65b9\u6cd5\u4e2d\u50c5\u6703\u6311\u9078\u5176\u4e2d\u4e00\u7a2e\u3002 \u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn feature_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002 Build Ensemble \u5728 Auto-sklearn \u8a13\u7df4\u968e\u6bb5\u6703\u7522\u751f\u8a31\u591a\u8868\u73fe\u512a\u826f\u7684\u6a21\u578b\uff0c\u6700\u7d42\u900f\u904e\u8caa\u5a6a\u6cd5\u7684 Bagging Ensemble Selection \u65b9\u6cd5\u4f86\u5408\u4f75\u591a\u500b\u6a21\u578b\u7d44\u5408\u6210\u4e00\u500b\u66f4\u5f37\u66f4\u5927\u7684\u6a21\u578b\uff0c\u4e26\u63d0\u9ad8\u9810\u6e2c\u7684\u6e96\u78ba\u6027\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u9032\u884c\u7684\u5be6\u9a57\uff0c\u5176\u4e2d\u6a6b\u8ef8\u70ba\u7a0b\u5f0f\u57f7\u884c\u6642\u9593\uff0c\u7e31\u8ef8\u70ba\u5728\u6642\u9593\u5167\u641c\u5c0b\u5230\u7684\u6700\u4f73\u6a21\u578b\u7684\u6392\u540d\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7da0\u8272\u7dda\u689d\u518d\u52a0\u5165\u4e86\u6574\u9ad4\u5b78\u7fd2\u6a5f\u5236\u8868\u73fe\u6548\u679c\u6bd4\u5c1a\u672a\u52a0\u5165\u7684\u85cd\u8272\u7dda\u689d\u5be6\u9a57\u4f86\u5f97\u597d\u3002\u4e26\u4e14\u5728\u77ed\u6642\u9593\u5167\u5c31\u53ef\u4ee5\u5f97\u5230\u4e0d\u932f\u7684\u7d50\u679c\u3002 \u5b89\u88dd Auto-sklearn \u76ee\u524d Auto-sklearn \u50c5\u652f\u63f4 Lunux \u7cfb\u7d71\u3002\u82e5\u6c92\u6709\u6b64\u7cfb\u7d71\u7684\u8b80\u8005\u53ef\u4ee5\u900f\u904e Colab \u9ad4\u9a57\u3002\u53e6\u5916\u82e5\u5b89\u88dd\u904e\u7a0b\u4e2d\u51fa\u73fe\u932f\u8aa4\uff0c\u5fc5\u9808\u5148\u78ba\u8a8d swig \u662f\u5426\u5df2\u5b8c\u6210\u5b89\u88dd\u3002 pip install auto-sklearn \u82e5\u4f7f\u7528 Colab \u57f7\u884c\uff0c\u5b89\u88dd\u5b8c\u6210\u5f8c\u9ede\u9078\u4e0a\u65b9\u5de5\u5177\u5217 Runtime -> Restart runtime \u91cd\u555f\u624d\u80fd\u6b63\u5e38\u57f7\u884c\u6b64\u5957\u4ef6\u3002 \u8f09\u5165\u8cc7\u6599\u96c6 \u672c\u6b21\u7bc4\u4f8b\u6cbf\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Auto-sklearn \u4f86\u641c\u5c0b\u6700\u4f73\u7684\u5206\u985e\u5668\u6a21\u578b\u3002\u6b64\u5916\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u89c0\u5bdf Auto-sklearn \u627e\u5230\u7684\u6700\u4f73\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8868\u73fe\uff0c\u4e26\u8207\u524d\u5e7e\u5929\u6240\u4ecb\u7d39\u7684\u90a3\u4e9b\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4f86\u505a\u6bd4\u8f03\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u6211\u5011\u6309\u7167\u82b1\u6735\u7a2e\u985e\u7684\u6578\u91cf\u5c0d\u8cc7\u6599\u96c6\u4ee5 7:3 \u7684\u6bd4\u4f8b\u5207\u5272\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53c3\u6578 stratify=y \u8a2d\u5b9a\u662f\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5c0d\u65bc\u4e09\u7a2e\u82b1\u6735\u985e\u5225\u7684\u6bd4\u4f8b\u5728\u9019\u5169\u500b\u5207\u51fa\u4f86\u7684\u8cc7\u6599\u96c6\u4e2d\u6bd4\u4f8b\u8981\u4e00\u6a23\uff0c\u4ee5\u514d\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4) Auto-sklearn \u4ee5\u4e0b\u662f\u6a21\u578b\u5e38\u7528\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u65b9\u6cd5\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 API \u6587\u4ef6 \u3002 Parameters: - time_left_for_this_task: \u641c\u5c0b\u6642\u9593(\u79d2)\uff0c\u9810\u8a2d3600\u79d2(6\u5206\u9418)\u3002 - per_run_time_limit: \u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u7684\u4e0a\u9650\u6642\u9593\uff0c\u9810\u8a2d\u70batime_left_for_this_task\u76841/10\u3002 - ensemble_size: \u6a21\u578b\u8f38\u51fa\u6578\u91cf\uff0c\u9810\u8a2d50\u3002 - resampling_strategy: \u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u70ba\u4e86\u907f\u514d\u904e\u64ec\u5408\uff0c\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49\u6a5f\u5236\u3002\u9810\u8a2d\u65b9\u6cd5\u70ba\u6700\u57fa\u672c\u7684 holdout\u3002 Attributes: - cv_results_: \u67e5\u8a62\u6a21\u578b\u641c\u5c0b\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6700\u4f73\u6a21\u578b\u7684\u8d85\u53c3\u6578\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - refit: \u4f7f\u7528 fit \u5c0b\u627e\u597d\u7684\u53c3\u6578\u5f8c\uff0c\u518d\u4f7f\u7528\u6240\u6709\u7684\u8cc7\u6599\u9032\u884c\u6700\u5f8c\u5fae\u8abf\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - leaderboard: \u986f\u793a k \u500b ensemble \u6a21\u578b\u4e26\u6392\u540d\u3002 \u9996\u5148\u6211\u5011\u4f86\u6e2c\u8a66\u7b2c\u4e00\u7248\u7684 Auto-sklearn\uff0c\u5efa\u7acb\u4e00\u500b\u5206\u985e\u5668\u985e\u578b\u7684\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u8a2d\u5b9a\u76f8\u95dc\u7684\u57f7\u884c\u53c3\u6578\u3002\u5728\u672c\u6b21\u5be6\u9a57\u4e2d\u6211\u5011\u8a2d\u5b9a\u6a21\u578b\u641c\u5c0b\u7e3d\u6642\u9593\u70ba 180 \u79d2\uff0c\u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u6642\u9593\u9650\u5236 40 \u79d2\u5167\u3002\u6b64\u5916\u8a2d\u5b9a resampling_strategy='cv' \u5373 K-Fold \u4ea4\u53c9\u9a57\u8b49\u3002\u6b64\u5916\u5fc5\u9808\u53e6\u5916\u8a2d\u5b9a resampling_strategy_arguments \u4e26\u7d66\u4e88 k=5\uff0c\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u4e94\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u4e94\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u4e94\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u56db\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002 import autosklearn.classification automlclassifierV1 = autosklearn . classification . AutoSklearnClassifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 , resampling_strategy = 'cv' , resampling_strategy_arguments = { 'folds' : 5 } ) automlclassifierV1 . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u4f86\u67e5\u770b\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8abf\u6574\u6a21\u578b\u8a13\u7df4\u6642\u9593\u4ee5\u53ca\u4e00\u4e9b\u63a7\u5236\u53c3\u6578\uff0c\u67e5\u770b\u662f\u5426\u6709\u6c92\u6709\u5e6b\u52a9\u6a21\u578b\u6e96\u78ba\u5ea6\u63d0\u5347\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV1 \u8a13\u7df4\u96c6: ' , automlclassifierV1 . score ( X_train , y_train )) print ( 'automlclassifierV1 \u6e2c\u8a66\u96c6: ' , automlclassifierV1 . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a automlclassifierV1 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV1 \u6e2c\u8a66\u96c6: 0.9111111111111111 \u4f7f\u7528 Auto-sklearn 2.0 \u5728\u7b2c\u4e8c\u7248\u7684 Auto-sklearn \u5c0d\u6a21\u578b\u641c\u5c0b\u9032\u884c\u4e86\u4e00\u4e9b\u512a\u5316\uff0c\u4e26\u4e14\u53ef\u4ee5\u81ea\u52d5\u641c\u5c0b\u597d\u7684\u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u56e0\u6b64\u6211\u5011\u4e0d\u7279\u5730\u53bb\u6307\u5b9a resampling_strategy \uff0c\u67e5\u770b\u8868\u73fe\u662f\u5426\u80fd\u5920\u63d0\u5347\u3002 from autosklearn.experimental.askl2 import AutoSklearn2Classifier automlclassifierV2 = AutoSklearn2Classifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 ) automlclassifierV2 . fit ( X_train , y_train ) # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV2 \u8a13\u7df4\u96c6: ' , automlclassifierV2 . score ( X_train , y_train )) print ( 'automlclassifierV2 \u6e2c\u8a66\u96c6: ' , automlclassifierV2 . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a automlclassifierV2 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV2 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u4f7f\u7528\u4e00\u6a23\u7684\u641c\u5c0b\u6642\u9593\u8207\u8a13\u7df4\u9650\u5236\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u8868\u73fe\u4e0d\u932f\u3002\u5169\u8005\u7684\u6e96\u78ba\u7387\u66f4\u63a5\u8fd1\u4e86\u3002\u9019\u6a23\u7684\u7d50\u679c\u7684\u78ba\u6bd4\u7cfb\u5217\u6559\u5b78\u6240\u4ecb\u7d39\u7684\u4efb\u4e00\u500b\u55ae\u4e00\u6a21\u578b\u9084\u4f86\u5f97\u597d\u3002 \u67e5\u770b\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd \u6211\u5011\u53ef\u4ee5\u4f7f\u7528\u6a21\u578b\u63d0\u4f9b\u7684\u65b9\u6cd5\u67e5\u770b\u6700\u7d42\u8a13\u7df4\u7d50\u679c\uff0c\u4e26\u67e5\u770b k \u500b Ensemble \u6a21\u578b\u7684\u8a13\u7df4\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd\u3002 automlclassifierV2 . leaderboard ( detailed = True , ensemble_only = True ) \u8f38\u51fa\u6a21\u578b \u5982\u679c\u60f3\u5c07 AutoML \u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\uff0c\u53ef\u4ee5\u900f\u904e joblib \u5c07\u6a21\u578b\u6253\u5305\u532f\u51fa\u3002 from joblib import dump , load # \u532f\u51fa\u6a21\u578b dump ( automlclassifierV2 , 'model.joblib' ) # \u532f\u5165\u6a21\u578b clf = load ( 'model.joblib' ) # \u6a21\u578b\u9810\u6e2c\u6e2c\u8a66 clf . predict ( X_test ) \u8996\u89ba\u5316 AutoML \u6a21\u578b \u9996\u5148\u5b89\u88dd pipelineprofiler \u3002 pip install pipelineprofiler \u900f\u904e PipelineProfiler \u5957\u4ef6\u53ef\u4ee5\u5f88\u5feb\u901f\u5730\u6aa2\u8996\u6a21\u578b\u8a13\u7df4\u7d50\u679c\uff0c\u4ee5\u53ca\u6bcf\u4e00\u500b Ensemble \u6a21\u578b\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u548c\u7279\u5fb5\u8655\u7406\u65b9\u6cd5\u3002 import PipelineProfiler profiler_data = PipelineProfiler . import_autosklearn ( automlclassifierV2 ) PipelineProfiler . plot_pipeline_matrix ( profiler_data ) Reference [1] Feurer, Matthias et al. Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [2] Feurer, Matthias et al. Supplementary Material for Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [3] Feurer, Matthias et al. Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning , arXiv, 2020. [4] Ono, Jorge et al. PipelineProfiler: A Visual Analytics Tool for the Exploration of AutoML Pipelines , arXiv, 2020. Auto Machine Learning\u7b46\u8a18- Bayesian Optimization A Quickstart Guide to Auto-Sklearn (AutoML) for Machine Learning Practitioners Auto-Sklearn: Scikit-Learn on Steroids \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn"},{"location":"20.Auto-Sklearn/#day-20-auto-sklearn","text":"","title":"[Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn"},{"location":"20.Auto-Sklearn/#_1","text":"\u4e86\u89e3 Auto-sklearn \u904b\u4f5c\u539f\u7406 Meta Learning Bayesian Optimization Build Ensemble \u5be6\u4f5c Auto-sklearn \u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u8a13\u7df4\uff0c\u4e26\u6bd4\u8f03\u5169\u7a2e\u4e0d\u540c\u7248\u672c\u7684 Auto-sklearn\u3002 \u4f7f\u7528 pipelineprofiler \u8996\u89ba\u5316 AutoML \u6a21\u578b\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"20.Auto-Sklearn/#_2","text":"Auto-sklearn \u63a1\u7528\u5143\u5b78\u7fd2 (Meta Learning) \u9078\u64c7\u6a21\u578b\u548c\u8d85\u53c3\u6578\u512a\u5316\u7684\u65b9\u6cd5\u4f5c\u70ba\u641c\u5c0b\u6700\u4f73\u6a21\u578b\u7684\u91cd\u9ede\u3002\u6b64 AutoML \u5957\u4ef6\u4e3b\u8981\u662f\u641c\u5c0b\u6240\u6709 Sklearn \u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4ee5\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u4e26\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316 (Bayesian Optimization) \u8207\u81ea\u52d5\u6574\u5408 (Ensemble Selection) \u7684\u67b6\u69cb\u5728\u6709\u9650\u6642\u9593\u5167\u641c\u5c0b\u6700\u4f73\u7684\u6a21\u578b\u3002\u7b2c\u4e00\u7248\u7684 Auto-sklearn \u65bc 2015 \u5e74\u767c\u8868\u5728 NIPS(Neural Information Processing Systems) \u6703\u8b70\u4e0a\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Efficient and Robust Automated Machine Learning \u3002\u6709\u5225\u65bc\u5176\u4ed6\u7684 AutoML \u65b9\u6cd5\uff0cAuto-sklearn \u63d0\u51fa\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\u6539\u5584\u4e86\u8c9d\u8449\u65af\u512a\u5316\u5728\u521d\u59cb\u51b7\u555f\u52d5\u7684\u7f3a\u9ede\uff0c\u4e26\u63d0\u4f9b\u4e00\u500b\u597d\u7684\u63a1\u6a23\u65b9\u5411\u66f4\u5feb\u901f\u5c0b\u627e\u6700\u4f73\u7684\u6a21\u578b[1]\u3002\u7b2c\u4e8c\u500b\u7248\u672c\u65bc 2020 \u5e74\u767c\u5e03\uff0c\u8ad6\u6587\u540d\u7a31\u70ba Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning \u3002\u5728\u65b0\u7684\u7248\u672c\u4e2d\u4fee\u6539\u4e86\u5143\u5b78\u7fd2\u67b6\u69cb\uff0c\u4e26\u4e0d\u4f9d\u8cf4\u5143\u7279\u5fb5\u4f86\u9078\u64c7\u6a21\u578b\u9078\u64c7\u8207\u8abf\u53c3\u7b56\u7565\u3002\u800c\u662f\u5f15\u5165\u4e86\u4e00\u500b\u5143\u5b78\u7fd2\u7b56\u7565\u9078\u64c7\u5668\uff0c\u6839\u64da\u8cc7\u6599\u96c6\u4e2d\u7684\u6a23\u672c\u6578\u91cf\u548c\u7279\u5fb5\uff0c\u8a02\u5b9a\u4e86\u4e00\u500b\u6a21\u578b\u9078\u64c7\u7684\u7b56\u7565[3]\u3002","title":"\u524d\u8a00"},{"location":"20.Auto-Sklearn/#automl-cash","text":"\u5728\u8ad6\u6587\u4e2d\u4f5c\u8005\u5c07 AutoML \u8996\u70ba\u6f14\u7b97\u6cd5\u9078\u64c7\u548c\u8d85\u53c3\u6578\u512a\u5316 (Combined Algorithm Selection and Hyperparameter, CASH) \u7684\u7d44\u5408\u6700\u4f73\u5316\u554f\u984c\u3002\u56e0\u70ba\u5728 AutoML \u9818\u57df\u7576\u4e2d\u5c07\u6703\u9762\u81e8\u5169\u500b\u554f\u984c\u3002\u7b2c\u4e00\u500b\u662f\u6c92\u6709\u4efb\u4f55\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u662f\u53ef\u4ee5\u4fdd\u8b49\u5728\u6240\u6709\u7684\u8cc7\u6599\u96c6\u4e2d\u8868\u73fe\u6700\u597d\uff0c\u56e0\u6b64\u6311\u9078\u4e00\u500b\u597d\u7684\u6f14\u7b97\u6cd5\u662f\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u7684\u9996\u8981\u4efb\u52d9\u3002\u7b2c\u4e8c\u8a31\u591a\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u5f80\u5f80\u4f9d\u8cf4\u65bc\u8d85\u53c3\u6578\uff0c\u900f\u904e\u4e0d\u540c\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u53ef\u4ee5\u53d6\u5f97\u66f4\u597d\u7684\u5b78\u7fd2\u7d50\u679c\u3002\u4f8b\u5982\u5728 SVM \u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e0d\u540c\u7684\u6838\u6280\u5de7\u8b93\u6a21\u578b\u5177\u6709\u975e\u7dda\u6027\u7684\u80fd\u529b\uff0c\u6216\u662f\u900f\u904e\u8d85\u53c3\u6578 C \u9650\u5236\u6a21\u578b\u7684\u8907\u96dc\u5ea6\u9632\u6b62\u904e\u5ea6\u64ec\u5408\u3002\u7136\u800c\u8c9d\u8449\u65af\u512a\u5316\u5982\u4eca\u6210\u70ba AutoML \u8d85\u53c3\u6578\u641c\u5c0b\u7684\u91cd\u8981\u6838\u5fc3\u65b9\u6cd5\u3002","title":"AutoML \u8996\u70ba CASH \u554f\u984c"},{"location":"20.Auto-Sklearn/#auto-sklearn","text":"Auto-sklearn \u53ef\u4ee5\u88ab\u62ff\u4f86\u8655\u7406\u8ff4\u6b78\u548c\u5206\u985e\u7684\u554f\u984c\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u6240\u7e6a\u88fd\u7684\u67b6\u69cb\u5716\u3002\u6211\u5011\u53ef\u4ee5\u5c07 Auto-sklearn \u5207\u6210\u4e09\u500b\u90e8\u5206\uff0c\u5176\u4e2d\u7b2c\u4e00\u500b\u662f\u5f15\u5165\u5143\u5b78\u7fd2\u6a5f\u5236\u4f86\u6a21\u4eff\u5c08\u5bb6\u5728\u8655\u7406\u6a5f\u5668\u5b78\u7fd2\u7684\u5148\u9a57\u77e5\u8b58\u3002\u4e26\u63a1\u7528\u5143\u7279\u5fb5\u8b93\u6211\u5011\u66f4\u6709\u6548\u7387\u7684\u53bb\u6c7a\u5b9a\u5728\u65b0\u7684\u8cc7\u6599\u96c6\u4e2d\u8a72\u6311\u9078\u54ea\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u63a5\u8457\u6311\u597d\u6a21\u578b\u5f8c\u4e26\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u6311\u9078\u5408\u9069\u7684\u6a21\u578b\u8d85\u53c3\u6578\uff0c\u4ee5\u53ca\u5617\u8a66\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u8207\u7279\u5fb5\u5de5\u7a0b\u3002\u6700\u5f8c\u6311\u9078\u5e7e\u500b\u4e0d\u932f\u7684\u6a21\u578b\u4e26\u900f\u904e\u6574\u9ad4\u5b78\u7fd2\u7684\u6280\u5de7\u9032\u884c\u6a21\u578b\u5806\u758a\uff0c\u5c07\u8868\u73fe\u4e0d\u932f\u7684\u6a21\u578b\u8f38\u51fa\u7d50\u679c\u505a\u4e00\u500b\u52a0\u6b0a\u548c\u6216\u662f\u6295\u7968\u3002 Meta Learning Bayesian Optimization Build Ensemble","title":"Auto-sklearn \u67b6\u69cb"},{"location":"20.Auto-Sklearn/#meta-learning","text":"\u7576\u6211\u5011\u60f3\u5c0d\u65b0\u8cc7\u6599\u96c6\u505a\u5206\u985e\u6216\u8ff4\u6b78\u6642\uff0cAuto-sklearn \u6703\u5148\u63d0\u53d6\u5143\u7279\u5fb5\uff0c\u5177\u6709\u76f8\u4f3c\u5143\u7279\u5fb5\u7684\u8cc7\u6599\u96c6\u5728\u540c\u4e00\u7d44\u8d85\u53c3\u6578\u61c9\u8a72\u6703\u6709\u76f8\u4f3c\u7684\u8868\u73fe\u3002\u56e0\u6b64\u900f\u904e\u5143\u7279\u5fb5\u53ef\u4ee5\u6709\u6548\u5730\u8a55\u4f30\u5728\u65b0\u8cc7\u6599\u96c6\u4e0a\u61c9\u8a72\u4f7f\u7528\u54ea\u7a2e\u7b97\u6cd5\u3002\u5143\u5b78\u7fd2\u5728\u9019\u88e1\u7684\u76ee\u7684\u662f\u70ba\u4e86\u8981\u627e\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\u505a\u521d\u59cb\u5316\uff0c\u4f7f\u5176\u5728\u4e00\u958b\u59cb\u7684\u8868\u73fe\u512a\u65bc\u96a8\u6a5f\u7684\u65b9\u6cd5\u3002\u4e26\u63d0\u4f9b\u8c9d\u8449\u65af\u512a\u5316\u6709\u500b\u660e\u78ba\u7684\u65b9\u5411\u3002Auto-sklearn \u53c3\u8003\u4e86 OpenML 140 \u500b\u8cc7\u6599\u96c6\uff0c\u4e26\u5f59\u6574\u4e86 38 \u500b\u5143\u7279\u5fb5\uff0c\u4f8b\u5982\uff1a\u504f\u5ea6\u3001\u5cf0\u5ea6\u3001\u7279\u5fb5\u6578\u91cf\u3001\u985e\u5225\u6578\u91cf......\u7b49\u3002\u9996\u5148\u70ba\u9019 140 \u500b\u8cc7\u6599\u96c6\u4f7f\u7528\u8c9d\u8449\u65af\u512a\u5316\u9032\u884c\u6a21\u578b\u8a13\u7df4\uff0c\u4e26\u5c07\u9019\u4e9b\u8cc7\u6599\u96c6\u5c0d\u61c9\u7684\u6a21\u578b\u8207\u6700\u4f73\u7684\u8d85\u53c3\u6578\u5132\u5b58\u8d77\u4f86\u3002\u7576\u6709\u65b0\u7684\u8cc7\u6599\u96c6\u9032\u4f86\u6642\u6703\u5148\u900f\u904e\u5143\u7279\u5fb5\u9032\u884c\u76f8\u4f3c\u5ea6\u5339\u914d\uff0c\u4e26\u5c07\u5339\u914d\u7a0b\u5ea6\u6700\u9ad8\u7684\u524d k \u500b\u8cc7\u6599\u96c6 (\u9810\u8a2dk=25) \u6240\u5c0d\u61c9\u7684\u6a21\u578b\u548c\u8d85\u53c3\u6578\u4f5c\u70ba\u8c9d\u8449\u65af\u512a\u5316\u7684\u521d\u59cb\u8a2d\u5b9a\u3002","title":"Meta Learning"},{"location":"20.Auto-Sklearn/#bayesian-optimization","text":"\u5728\u8c9d\u8449\u65af\u512a\u5316\u7576\u4e2d\u4e3b\u8981\u6703\u5c0b\u627e\u8a72\u8cc7\u6599\u96c6\u4e2d\u6700\u5408\u9069\u7684\u8cc7\u6599\u524d\u8655\u7406 (data pre-processors)\u3001\u7279\u5fb5\u524d\u8655\u7406 (feature pre-processors) \u8207\u5206\u985e/\u8ff4\u6b78\u6a21\u578b\u3002\u4ee5\u4e0a\u4e09\u5927\u985e\u5408\u8a08\u5171\u6709 110 \u500b\u8d85\u53c3\u6578\u5fc5\u9808\u900f\u904e\u8c9d\u8449\u65af\u512a\u5316\u4f86\u5c0b\u627e\u6700\u9069\u5408\u7684\u53c3\u6578\u7d44\u5408\u3002\u5176\u8c9d\u8449\u65af\u512a\u5316\u4e3b\u8981\u65b9\u6cd5\u662f\u900f\u904e\u5efa\u7acb\u76ee\u6a19\u51fd\u6578\u7684\u6a5f\u7387\u6a21\u578b\uff0c\u4e26\u7528\u5b83\u4f86\u9078\u64c7\u6700\u6709\u5e0c\u671b\u7684\u8d85\u53c3\u6578\u4f86\u8a55\u4f30\u771f\u5be6\u7684\u76ee\u6a19\u51fd\u6578\u3002 \u4ee5\u4e0b\u5167\u5bb9\u6458\u9304\u81ea Auto-sklearn v1.0 \u8ad6\u6587\u63d0\u4f9b\u7684\u5167\u5bb9 [1][2]","title":"Bayesian Optimization"},{"location":"20.Auto-Sklearn/#data-pre-processors","text":"\u5728\u8cc7\u6599\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86\u56db\u7a2e\u65b9\u6cd5\u3002\u5305\u542b\u7279\u5fb5\u7e2e\u653e\u3001\u586b\u88dc\u7f3a\u5931\u503c\u3001\u985e\u5225\u7279\u5fb5\u9032\u884c one-hot encoding \u8207\u8655\u7406\u76ee\u6a19\u8f38\u51fa\u985e\u5225\u6578\u91cf\u4e0d\u5e73\u8861\u554f\u984c\u3002 Data Pre-processors \u7279\u5fb5\u7e2e\u653e \u586b\u88dc\u7f3a\u5931\u503c one-hot encoding \u985e\u5225\u8cc7\u6599\u4e0d\u5e73\u8861 \u5728\u65b0\u7684\u7248\u672c\u4e2d\u591a\u4e86\u4e00\u4e9b\u8cc7\u6599\u524d\u8655\u7406\u65b9\u6cd5\uff0c\u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn data_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002","title":"Data Pre-processors"},{"location":"20.Auto-Sklearn/#feature-pre-processors","text":"\u5728\u7279\u5fb5\u524d\u8655\u7406\u90e8\u5206 Auto-sklearn \u63d0\u4f9b\u4e86 12 \u7a2e\u7279\u5fb5\u8655\u7406\u7684\u6280\u5de7\uff0c\u7136\u800c\u5728\u773e\u591a\u65b9\u6cd5\u4e2d\u50c5\u6703\u6311\u9078\u5176\u4e2d\u4e00\u7a2e\u3002 \u8a73\u7d30\u53ef\u4ee5\u53c3\u8003 Auto-sklearn feature_preprocessing \u7684\u539f\u59cb\u7a0b\u5f0f\u3002","title":"Feature Pre-processors"},{"location":"20.Auto-Sklearn/#build-ensemble","text":"\u5728 Auto-sklearn \u8a13\u7df4\u968e\u6bb5\u6703\u7522\u751f\u8a31\u591a\u8868\u73fe\u512a\u826f\u7684\u6a21\u578b\uff0c\u6700\u7d42\u900f\u904e\u8caa\u5a6a\u6cd5\u7684 Bagging Ensemble Selection \u65b9\u6cd5\u4f86\u5408\u4f75\u591a\u500b\u6a21\u578b\u7d44\u5408\u6210\u4e00\u500b\u66f4\u5f37\u66f4\u5927\u7684\u6a21\u578b\uff0c\u4e26\u63d0\u9ad8\u9810\u6e2c\u7684\u6e96\u78ba\u6027\u3002\u4e0b\u5716\u70ba\u7b2c\u4e00\u7248\u8ad6\u6587\u4e2d\u9032\u884c\u7684\u5be6\u9a57\uff0c\u5176\u4e2d\u6a6b\u8ef8\u70ba\u7a0b\u5f0f\u57f7\u884c\u6642\u9593\uff0c\u7e31\u8ef8\u70ba\u5728\u6642\u9593\u5167\u641c\u5c0b\u5230\u7684\u6700\u4f73\u6a21\u578b\u7684\u6392\u540d\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7da0\u8272\u7dda\u689d\u518d\u52a0\u5165\u4e86\u6574\u9ad4\u5b78\u7fd2\u6a5f\u5236\u8868\u73fe\u6548\u679c\u6bd4\u5c1a\u672a\u52a0\u5165\u7684\u85cd\u8272\u7dda\u689d\u5be6\u9a57\u4f86\u5f97\u597d\u3002\u4e26\u4e14\u5728\u77ed\u6642\u9593\u5167\u5c31\u53ef\u4ee5\u5f97\u5230\u4e0d\u932f\u7684\u7d50\u679c\u3002","title":"Build Ensemble"},{"location":"20.Auto-Sklearn/#auto-sklearn_1","text":"\u76ee\u524d Auto-sklearn \u50c5\u652f\u63f4 Lunux \u7cfb\u7d71\u3002\u82e5\u6c92\u6709\u6b64\u7cfb\u7d71\u7684\u8b80\u8005\u53ef\u4ee5\u900f\u904e Colab \u9ad4\u9a57\u3002\u53e6\u5916\u82e5\u5b89\u88dd\u904e\u7a0b\u4e2d\u51fa\u73fe\u932f\u8aa4\uff0c\u5fc5\u9808\u5148\u78ba\u8a8d swig \u662f\u5426\u5df2\u5b8c\u6210\u5b89\u88dd\u3002 pip install auto-sklearn \u82e5\u4f7f\u7528 Colab \u57f7\u884c\uff0c\u5b89\u88dd\u5b8c\u6210\u5f8c\u9ede\u9078\u4e0a\u65b9\u5de5\u5177\u5217 Runtime -> Restart runtime \u91cd\u555f\u624d\u80fd\u6b63\u5e38\u57f7\u884c\u6b64\u5957\u4ef6\u3002","title":"\u5b89\u88dd Auto-sklearn"},{"location":"20.Auto-Sklearn/#_3","text":"\u672c\u6b21\u7bc4\u4f8b\u6cbf\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Auto-sklearn \u4f86\u641c\u5c0b\u6700\u4f73\u7684\u5206\u985e\u5668\u6a21\u578b\u3002\u6b64\u5916\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u89c0\u5bdf Auto-sklearn \u627e\u5230\u7684\u6700\u4f73\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u4e0a\u7684\u8868\u73fe\uff0c\u4e26\u8207\u524d\u5e7e\u5929\u6240\u4ecb\u7d39\u7684\u90a3\u4e9b\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u4f86\u505a\u6bd4\u8f03\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"\u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"20.Auto-Sklearn/#_4","text":"\u6211\u5011\u6309\u7167\u82b1\u6735\u7a2e\u985e\u7684\u6578\u91cf\u5c0d\u8cc7\u6599\u96c6\u4ee5 7:3 \u7684\u6bd4\u4f8b\u5207\u5272\u51fa\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53c3\u6578 stratify=y \u8a2d\u5b9a\u662f\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5c0d\u65bc\u4e09\u7a2e\u82b1\u6735\u985e\u5225\u7684\u6bd4\u4f8b\u5728\u9019\u5169\u500b\u5207\u51fa\u4f86\u7684\u8cc7\u6599\u96c6\u4e2d\u6bd4\u4f8b\u8981\u4e00\u6a23\uff0c\u4ee5\u514d\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4)","title":"\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"20.Auto-Sklearn/#auto-sklearn_2","text":"\u4ee5\u4e0b\u662f\u6a21\u578b\u5e38\u7528\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u65b9\u6cd5\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9 API \u6587\u4ef6 \u3002 Parameters: - time_left_for_this_task: \u641c\u5c0b\u6642\u9593(\u79d2)\uff0c\u9810\u8a2d3600\u79d2(6\u5206\u9418)\u3002 - per_run_time_limit: \u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u7684\u4e0a\u9650\u6642\u9593\uff0c\u9810\u8a2d\u70batime_left_for_this_task\u76841/10\u3002 - ensemble_size: \u6a21\u578b\u8f38\u51fa\u6578\u91cf\uff0c\u9810\u8a2d50\u3002 - resampling_strategy: \u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u70ba\u4e86\u907f\u514d\u904e\u64ec\u5408\uff0c\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49\u6a5f\u5236\u3002\u9810\u8a2d\u65b9\u6cd5\u70ba\u6700\u57fa\u672c\u7684 holdout\u3002 Attributes: - cv_results_: \u67e5\u8a62\u6a21\u578b\u641c\u5c0b\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6700\u4f73\u6a21\u578b\u7684\u8d85\u53c3\u6578\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - refit: \u4f7f\u7528 fit \u5c0b\u627e\u597d\u7684\u53c3\u6578\u5f8c\uff0c\u518d\u4f7f\u7528\u6240\u6709\u7684\u8cc7\u6599\u9032\u884c\u6700\u5f8c\u5fae\u8abf\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - leaderboard: \u986f\u793a k \u500b ensemble \u6a21\u578b\u4e26\u6392\u540d\u3002 \u9996\u5148\u6211\u5011\u4f86\u6e2c\u8a66\u7b2c\u4e00\u7248\u7684 Auto-sklearn\uff0c\u5efa\u7acb\u4e00\u500b\u5206\u985e\u5668\u985e\u578b\u7684\u81ea\u52d5\u5316\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u8a2d\u5b9a\u76f8\u95dc\u7684\u57f7\u884c\u53c3\u6578\u3002\u5728\u672c\u6b21\u5be6\u9a57\u4e2d\u6211\u5011\u8a2d\u5b9a\u6a21\u578b\u641c\u5c0b\u7e3d\u6642\u9593\u70ba 180 \u79d2\uff0c\u6bcf\u500b\u6a21\u578b\u8a13\u7df4\u6642\u9593\u9650\u5236 40 \u79d2\u5167\u3002\u6b64\u5916\u8a2d\u5b9a resampling_strategy='cv' \u5373 K-Fold \u4ea4\u53c9\u9a57\u8b49\u3002\u6b64\u5916\u5fc5\u9808\u53e6\u5916\u8a2d\u5b9a resampling_strategy_arguments \u4e26\u7d66\u4e88 k=5\uff0c\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u4e94\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u4e94\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u4e94\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u56db\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002 import autosklearn.classification automlclassifierV1 = autosklearn . classification . AutoSklearnClassifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 , resampling_strategy = 'cv' , resampling_strategy_arguments = { 'folds' : 5 } ) automlclassifierV1 . fit ( X_train , y_train ) \u8a13\u7df4\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u4f86\u67e5\u770b\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u3002\u5927\u5bb6\u53ef\u4ee5\u8a66\u8457\u8abf\u6574\u6a21\u578b\u8a13\u7df4\u6642\u9593\u4ee5\u53ca\u4e00\u4e9b\u63a7\u5236\u53c3\u6578\uff0c\u67e5\u770b\u662f\u5426\u6709\u6c92\u6709\u5e6b\u52a9\u6a21\u578b\u6e96\u78ba\u5ea6\u63d0\u5347\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV1 \u8a13\u7df4\u96c6: ' , automlclassifierV1 . score ( X_train , y_train )) print ( 'automlclassifierV1 \u6e2c\u8a66\u96c6: ' , automlclassifierV1 . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a automlclassifierV1 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV1 \u6e2c\u8a66\u96c6: 0.9111111111111111","title":"Auto-sklearn"},{"location":"20.Auto-Sklearn/#auto-sklearn-20","text":"\u5728\u7b2c\u4e8c\u7248\u7684 Auto-sklearn \u5c0d\u6a21\u578b\u641c\u5c0b\u9032\u884c\u4e86\u4e00\u4e9b\u512a\u5316\uff0c\u4e26\u4e14\u53ef\u4ee5\u81ea\u52d5\u641c\u5c0b\u597d\u7684\u8cc7\u6599\u63a1\u6a23\u65b9\u5f0f\u3002\u56e0\u6b64\u6211\u5011\u4e0d\u7279\u5730\u53bb\u6307\u5b9a resampling_strategy \uff0c\u67e5\u770b\u8868\u73fe\u662f\u5426\u80fd\u5920\u63d0\u5347\u3002 from autosklearn.experimental.askl2 import AutoSklearn2Classifier automlclassifierV2 = AutoSklearn2Classifier ( time_left_for_this_task = 180 , per_run_time_limit = 40 ) automlclassifierV2 . fit ( X_train , y_train ) # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( 'automlclassifierV2 \u8a13\u7df4\u96c6: ' , automlclassifierV2 . score ( X_train , y_train )) print ( 'automlclassifierV2 \u6e2c\u8a66\u96c6: ' , automlclassifierV2 . score ( X_test , y_test )) \u57f7\u884c\u7d50\u679c\uff1a automlclassifierV2 \u8a13\u7df4\u96c6: 0.9904761904761905 automlclassifierV2 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u4f7f\u7528\u4e00\u6a23\u7684\u641c\u5c0b\u6642\u9593\u8207\u8a13\u7df4\u9650\u5236\uff0c\u6700\u7d42\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u90fd\u8868\u73fe\u4e0d\u932f\u3002\u5169\u8005\u7684\u6e96\u78ba\u7387\u66f4\u63a5\u8fd1\u4e86\u3002\u9019\u6a23\u7684\u7d50\u679c\u7684\u78ba\u6bd4\u7cfb\u5217\u6559\u5b78\u6240\u4ecb\u7d39\u7684\u4efb\u4e00\u500b\u55ae\u4e00\u6a21\u578b\u9084\u4f86\u5f97\u597d\u3002","title":"\u4f7f\u7528 Auto-sklearn 2.0"},{"location":"20.Auto-Sklearn/#_5","text":"\u6211\u5011\u53ef\u4ee5\u4f7f\u7528\u6a21\u578b\u63d0\u4f9b\u7684\u65b9\u6cd5\u67e5\u770b\u6700\u7d42\u8a13\u7df4\u7d50\u679c\uff0c\u4e26\u67e5\u770b k \u500b Ensemble \u6a21\u578b\u7684\u8a13\u7df4\u7d50\u679c\u4ee5\u53ca\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd\u3002 automlclassifierV2 . leaderboard ( detailed = True , ensemble_only = True )","title":"\u67e5\u770b\u6bcf\u500b\u6a21\u578b\u7684\u6b0a\u91cd"},{"location":"20.Auto-Sklearn/#_6","text":"\u5982\u679c\u60f3\u5c07 AutoML \u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\uff0c\u53ef\u4ee5\u900f\u904e joblib \u5c07\u6a21\u578b\u6253\u5305\u532f\u51fa\u3002 from joblib import dump , load # \u532f\u51fa\u6a21\u578b dump ( automlclassifierV2 , 'model.joblib' ) # \u532f\u5165\u6a21\u578b clf = load ( 'model.joblib' ) # \u6a21\u578b\u9810\u6e2c\u6e2c\u8a66 clf . predict ( X_test )","title":"\u8f38\u51fa\u6a21\u578b"},{"location":"20.Auto-Sklearn/#automl","text":"\u9996\u5148\u5b89\u88dd pipelineprofiler \u3002 pip install pipelineprofiler \u900f\u904e PipelineProfiler \u5957\u4ef6\u53ef\u4ee5\u5f88\u5feb\u901f\u5730\u6aa2\u8996\u6a21\u578b\u8a13\u7df4\u7d50\u679c\uff0c\u4ee5\u53ca\u6bcf\u4e00\u500b Ensemble \u6a21\u578b\u7684\u8d85\u53c3\u6578\u4ee5\u53ca\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f\u548c\u7279\u5fb5\u8655\u7406\u65b9\u6cd5\u3002 import PipelineProfiler profiler_data = PipelineProfiler . import_autosklearn ( automlclassifierV2 ) PipelineProfiler . plot_pipeline_matrix ( profiler_data )","title":"\u8996\u89ba\u5316 AutoML \u6a21\u578b"},{"location":"20.Auto-Sklearn/#reference","text":"[1] Feurer, Matthias et al. Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [2] Feurer, Matthias et al. Supplementary Material for Efficient and Robust Automated Machine Learning , Advances in neural information processing systems 2015. [3] Feurer, Matthias et al. Auto-Sklearn 2.0: Hands-free AutoML via Meta-Learning , arXiv, 2020. [4] Ono, Jorge et al. PipelineProfiler: A Visual Analytics Tool for the Exploration of AutoML Pipelines , arXiv, 2020. Auto Machine Learning\u7b46\u8a18- Bayesian Optimization A Quickstart Guide to Auto-Sklearn (AutoML) for Machine Learning Practitioners Auto-Sklearn: Scikit-Learn on Steroids \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"21.Optuna/","text":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f \u5be6\u4f5c Optuna \u641c\u5c0b\u6700\u4f73\u8d85\u53c3\u6578 \u4ee5 XGBoost \u8ff4\u6b78\u6a21\u578b\u65bc\u623f\u50f9\u9810\u6e2c\u70ba\u4f8b Optuna \u8996\u89ba\u5316\u5206\u6790\u641c\u5c0b\u7d50\u679c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u4f60\u662f\u5426\u66fe\u7d93\u89ba\u5f97\u6a21\u578b\u6709\u592a\u591a\u7684\u8d85\u53c3\u6578\u800c\u611f\u5230\u53ad\u7169\u55ce\uff1f\u8981\u5f9e\u67d0\u4e00\u500b\u6f14\u7b97\u6cd5\u5f97\u5230\u597d\u7684\u89e3\u5fc5\u9808\u8981\u8abf\u6574\u8d85\u53c3\u6578\uff0c\u6240\u8b02\u7684\u8d85\u53c3\u6578\u5c31\u662f\u63a7\u5236\u8a13\u7df4\u6a21\u578b\u7684\u4e00\u7d44\u795e\u79d8\u6578\u5b57\uff0c\u4f8b\u5982\u5b78\u7fd2\u901f\u7387\u5c31\u662f\u4e00\u7a2e\u8d85\u53c3\u6578\u3002\u4f60\u6c38\u9060\u90fd\u4e0d\u77e5\u9053 0~1 \u4e4b\u9593\u54ea\u4e00\u500b\u6578\u5b57\u662f\u6700\u9069\u5408\u7684\uff0c\u552f\u4e00\u7684\u65b9\u6cd5\u5c31\u662f\u8a66\u932f (trial and error)\u3002\u90a3\u842c\u4e00\u6a21\u578b\u6709\u591a\u500b\u8d85\u53c3\u6578\u53ef\u4ee5\u63a7\u5236\uff0c\u8c48\u4e0d\u662f\u5c31\u6709\u6210\u5343\u4e0a\u842c\u7a2e\u7d44\u5408\u8981\u6162\u6162\u5617\u8a66\u55ce\uff1f\u5982\u679c\u4f60\u6709\u4e5f\u9019\u500b\u554f\u984c\uff0c\u770b\u9019\u7bc7\u5c31\u5c0d\u4e86\uff01\u96d6\u7136\u4f60\u53ef\u80fd\u807d\u904e Sklearn \u7684 GridSearchCV \u540c\u6a23\u4e5f\u662f\u66b4\u529b\u7684\u627e\u51fa\u6700\u4f73\u53c3\u6578\uff0c\u6216\u662f\u4f7f\u7528 RandomizedSearchCV \u6307\u5b9a\u8d85\u53c3\u6578\u7684\u7bc4\u570d\u4e26\u96a8\u6a5f\u7684\u62bd\u53d6\u53c3\u6578\u9032\u2f8f\u8a13\u7df4\uff0c\u5176\u5b83\u5011\u7684\u5171\u540c\u7f3a\u9ede\u662f\u975e\u5e38\u8017\u6642\u8207\u4f54\u7528\u6a5f\u5668\u8cc7\u6e90\u3002\u9019\u88e1\u6211\u5011\u8981\u4f86\u4ecb\u7d39 Optuna \u9019\u500b\u81ea\u52d5\u627e\u8d85\u53c3\u6578\u7684\u65b9\u4fbf\u5de5\u5177\uff0c\u4e26\u4e14\u53ef\u4ee5\u548c\u591a\u500b\u5e38\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u6574\u5408\u3002Optuna \u900f\u904e\u8abf\u6574\u9069\u7576\u7684\u8d85\u53c3\u6578\u4f86\u63d0\u9ad8\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u6b64\u5c08\u6848\u6700\u521d\u65bc 2019 \u767c\u8868\u65bc arxiv \u7684\u4e00\u7bc7\u8ad6\u6587 Optuna: A Next-generation Hyperparameter Optimization Framework \u540c\u6642\u958b\u6e90\u5728 GitHub \u4e0a\u514d\u8cbb\u63d0\u4f9b\u5927\u5bb6\u4f7f\u7528\u3002\u540c\u6642 Optuna \u4e5f\u662f 2021 \u5e74 Kaggle \u8cc7\u6599\u79d1\u5b78\u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u7684\u6a21\u578b\u8abf\u53c3\u5de5\u5177\u3002\u90a3\u662f\u4ec0\u9ebc\u539f\u56e0\u8b93 Optuna \u53d7\u5230\u5ee3\u5927\u7684\u6a5f\u5668\u5b78\u7fd2\u793e\u7fa4\u5982\u6b64\u7684\u6b61\u8fce\u5462\uff1f\u5c31\u8b93\u6211\u5011\u4f86\u770b\u770b\u4ed6\u662f\u5982\u6b64\u5730\u5f37\u5927\u5427\uff01 \u95dc\u65bc Optuna Optuna \u662f\u4e00\u500b\u5c08\u70ba\u6a5f\u5668\u5b78\u7fd2\u8a2d\u8a08\u7684\u81ea\u52d5\u8d85\u53c3\u6578\u512a\u5316\u7684\u6846\u67b6\u3002\u5176\u6700\u7a81\u51fa\u7684\u7279\u9ede\u662f\uff1a \u4eba\u6027\u5316\u7684\u5b9a\u7fa9\u641c\u7d22\u7a7a\u9593\u3002 \u652f\u63f4\u5927\u591a\u6578 ML \u8207 DL \u7684\u5b78\u7fd2\u5957\u4ef6\u3002\u4f8b\u5982: Sklearn\u3001PyTorch\u3001TensorFlow, XGBoost\u3001LightGBM\u3001 CatBoost...\u7b49\u3002 \u5c0d\u5c0d\u641c\u7d22\u7d50\u679c\u63d0\u4f9b\u53ef\u89e3\u91cb\u6027(XAI)\u3002 \u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002 \u6c7a\u5b9a\u4e26\u7d42\u6b62\u4e0d\u6eff\u8db3\u9810\u5b9a\u7fa9\u689d\u4ef6\u7684\u8a66\u9a57\u3002 Optuna \u7c21\u55ae\u7bc4\u4f8b \u9019\u88e1\u6211\u5011\u8a2d\u5b9a\u4e00\u500b\u7c21\u55ae\u7684\u76ee\u6a19\u51fd\u5f0f $(x1+2)^2 + (x2-4)^2$\u3002\u6211\u5011\u90fd\u77e5\u9053\u7576\u9019\u500b\u5f0f\u5b50 x1=-2, x2=4 \u6642\u5c07\u6703\u6709\u6975\u5c0f\u503c 0\u3002\u56e0\u6b64\u6211\u5011\u5c31\u7528\u9019\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\u900f\u904e Optuna \u627e\u51fa\u9019\u500b\u51fd\u5f0f\u4e2d\u6975\u5c0f\u503c\u6240\u5c0d\u61c9\u7684 x1 \u8207 x2 \u5427\u3002 import optuna def objective ( trial ): x1 = trial . suggest_float ( \"x1\" , - 5 , 5 ) x2 = trial . suggest_float ( \"x2\" , - 5 , 5 ) return ( x1 + 2 ) ** 2 + ( x2 - 4 ) ** 2 \u9996\u5148\u8f09\u5165 optuna \u5957\u4ef6\uff0c\u5982\u679c\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install optuna \u63a5\u8457\u6211\u5011\u4f86\u5b9a\u7fa9\u4e00\u500b\u627e\u51fa\u6975\u5c0f\u503c\u7684\u76ee\u6a19\u51fd\u5f0f objective() \u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u5c07\u8981\u8a2d\u5b9a optuna \u53ef\u4ee5\u53bb\u5c0b\u627e\u7684\u4e00\u53c3\u6578\uff0c\u4e5f\u5c31\u662f x1 \u8207 x2\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e optuna \u6240\u63d0\u4f9b\u7684 trial \u7269\u4ef6\u4f86\u70ba\u6211\u5011\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u7d44\u7bc4\u570d\u3002\u5176\u4e2d\u5b83\u6709\u4e00\u500b suggest_float \u65b9\u6cd5\uff0c\u8a72\u65b9\u6cd5\u63a1\u7528\u8d85\u53c3\u6578\u7684\u540d\u7a31\u548c\u7bc4\u570d\u4f86\u5c0b\u627e\u5176\u6700\u4f73\u503c\u3002\u6211\u5011\u4ee5 x1 \u4f86\u8209\u4f8b\uff1a x1 = trial.suggest_float(\"x1\", -5, 5) \u4e0a\u9762\u9019\u4e00\u6bb5\u7a0b\u5f0f\u5728 GridSearch \u4e2d\u53ef\u4ee5\u8868\u793a\u6210 {\"x1\": np.arange(-5, 5, .1)} \u3002\u5373\u8868\u793a\u641c\u5c0b\u904e\u7a0b\u4e2d\u6211\u5011\u6703\u5f9e x1 \u96a8\u6a5f\u8a2d\u5b9a -5~5 \u4e4b\u9593\u7684\u4efb\u4e00\u6d6e\u9ede\u6578\u3002\u8a2d\u5b9a\u5b8c\u51fd\u5f0f\u5f8c\u5c31\u53ef\u4ee5\u958b\u59cb\u512a\u5316\u4e86\uff0c\u6211\u5011\u5f9e optuna \u5efa\u7acb\u4e00\u500b study \u7269\u4ef6\uff0c\u4e26\u5c07 objective \u51fd\u6578\u50b3\u905e\u7d66 study \u7684 optimize \u65b9\u6cd5\u3002\u7531\u65bc\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u627e\u51fa\u51fd\u5f0f\u4e2d\u7684\u6975\u5c0f\u503c\uff0c\u56e0\u6b64 direction \u8a2d\u70ba minimize \u3002\u53e6\u5916\u5728 optimize \u65b9\u6cd5\u4e2d\u6211\u5011\u4e5f\u53ef\u4ee5\u8a2d\u5b9a\u8a66\u9a57\u7684\u6b21\u6578(n_trials)\u6216\u6642\u9593(timeout)\u3002\u4e00\u5207\u5c31\u7dd2\u5f8c\u5373\u53ef\u958b\u59cb\u57f7\u884c\uff01\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u8fed\u4ee350\u6b21\u4e26\u5f9e\u4e2d\u627e\u5230\u4e00\u7d44\u6700\u4f73\u7684 x1 \u8207 x2 \u4f7f\u5176\u76ee\u6a19\u51fd\u5f0f\u53ef\u4ee5\u6700\u5c0f\u5316\u3002\u8dd1\u5b8c 50 \u6b21\u5f8c\u6211\u5011\u53ef\u4ee5\u7d93\u7531 study \u8b8a\u6578\u4e2d\u5f97\u5230\u4e00\u7d44\u6700\u4f73\u7684\u89e3\u3002\u8a66\u9a57\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u767c\u73fe x1 \u8da8\u8fd1\u65bc -2 \u548c x2 \u8da8\u8fd1\u65bc 4\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 50 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) \u8f38\u51fa\u7d50\u679c\uff1a Number of finished trials: 50 Best trial parameters: {'x1': -1.8154924755761588, 'x2': 3.9141985823539844} Best score: 0.04140490983908035 CPU times: user 432 ms, sys: 46.3 ms, total: 478 ms Wall time: 431 ms \u7531\u4e0a\u8ff0\u7684\u7c21\u55ae\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u5efa\u7acb\u4e00\u500b optuna \u6700\u4f73\u5316\u6d41\u7a0b\u50c5\u9700\u8981\u4e09\u6b65\u9a5f\uff1a 1. \u5efa\u7acb objective \u51fd\u5f0f\u8207\u8a2d\u5b9a trial\uff0c\u4e26\u56de\u50b3 loss\u3002 2. \u5efa\u7acb create_study() \u7269\u4ef6\u3002 3. \u4f7f\u7528 optimize() \u57f7\u884c\u641c\u5c0b\u3002 End-to-end example with XGBoost \u6211\u5011\u4ee5 Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u593e\u4f86\u505a\u7bc4\u4f8b\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u6709 13 \u500b\u5176\u8f38\u51fa\u70ba\u9810\u6e2c\u8a72\u7b46\u8cc7\u6599\u7684\u623f\u50f9\u3002\u7531\u65bc\u60f3\u8981\u5feb\u901f\u793a\u7bc4\u5982\u4f55\u4f7f\u7528 optuna\uff0c\u56e0\u6b64\u9019\u88e1\u5c31\u4e0d\u505a\u4efb\u4f55\u8cc7\u6599 EDA \u8207\u524d\u8655\u7406\u3002 from sklearn.datasets import load_boston X , y = load_boston ( return_X_y = True ) print ( 'X:' , X . shape ) print ( 'y:' , y . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X: (506, 13) y: (506,) \u8cc7\u6599\u96c6\u6210\u529f\u88ab\u8f09\u5165\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5efa\u7acb\u4e00\u500b objective \u51fd\u5f0f\u3002\u5728\u9019\u500b\u76ee\u6a19\u51fd\u5f0f\u4e2d\uff0c\u6211\u5011\u5efa\u7acb\u4e86\u4e00\u500b\u5c0f\u7bc4\u570d\u7684\u7684 XGBoost \u8d85\u53c3\u6578\u641c\u7d22\u7a7a\u9593\u3002\u5176\u6bcf\u4e00\u500b\u8d85\u53c3\u6578\u90fd\u6703\u6709\u4e00\u500b\u641c\u7d22\u7684\u7bc4\u570d\uff0c\u53ef\u4ee5\u4f7f\u7528 suggest_* \u65b9\u6cd5\u8a2d\u5b9a\u5340\u9593\u3002\u6b64\u65b9\u6cd5\u5fc5\u9808\u8f38\u5165\u8d85\u53c3\u6578\u7684\u540d\u7a31\uff0c\u4ee5\u53ca\u7d66\u4e88\u8a72\u53c3\u6578\u7684\u4e00\u7d44\u96a8\u6a5f\u7bc4\u570d\u5176\u578b\u614b\u6709\u5f88\u591a\u4f8b\u5982\uff1a suggest_int \u3001 suggest_discrete_uniform \u3001 suggest_float ...\u7b49\u3002\u66f4\u591a\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u5f9e \u5b98\u65b9\u6587\u4ef6 \u53d6\u5f97\u3002\u6216\u662f\u4e5f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9\u5728 GitHub \u4e0a\u5c0d\u65bc XGBoost \u7684\u4f7f\u7528\u7bc4\u4f8b\u3002 import optuna import xgboost as xgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def objective ( trial , X = X , y = y ): \"\"\" A function to train a model using different hyperparamerters combinations provided by Optuna. \"\"\" X_train , X_valid , y_train , y_valid = train_test_split ( X , y , test_size = 0.4 ) params = { 'max_depth' : trial . suggest_int ( 'max_depth' , 6 , 15 ), \"subsample\" : trial . suggest_float ( \"subsample\" , 0.2 , 1.0 ), 'n_estimators' : trial . suggest_int ( 'n_estimators' , 500 , 2000 , 100 ), 'eta' : trial . suggest_float ( \"eta\" , 1e-8 , 1.0 , log = True ), 'alpha' : trial . suggest_float ( 'alpha' , 1e-8 , 1.0 , log = True ), 'lambda' : trial . suggest_float ( 'lambda' , 1e-8 , 1.0 , log = True ), 'gamma' : trial . suggest_float ( \"gamma\" , 1e-8 , 1.0 , log = True ), 'min_child_weight' : trial . suggest_int ( 'min_child_weight' , 2 , 10 ), 'grow_policy' : trial . suggest_categorical ( \"grow_policy\" , [ \"depthwise\" , \"lossguide\" ]), \"colsample_bytree\" : trial . suggest_float ( \"colsample_bytree\" , 0.2 , 1.0 ) } reg = xgb . XGBRegressor ( ** params ) reg . fit ( X_train , y_train , eval_set = [( X_valid , y_valid )], eval_metric = 'rmse' , verbose = False ) return mean_squared_error ( y_valid , reg . predict ( X_valid ), squared = False ) \u8a2d\u5b9a\u597d\u8abf\u53c3\u7684\u5340\u9593\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u56c9\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 10 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) Optuna \u9810\u8a2d\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u65b9\u6cd5\u80fd\u6709\u6548\u5730\u5728\u77ed\u6642\u9593\u5167\u5f80\u6700\u4f73\u7684\u65b9\u5411\u53bb\u5c0b\u627e\u4e00\u7d44\u9069\u5408\u7684\u53c3\u6578\u3002\u8207 GridSearch \u76f8\u6bd4\u539f\u672c\u53ef\u80fd\u9700\u8981\u6578\u5c0f\u6642\u7684\u641c\u7d22\u7a7a\u9593\u5728\u77ed\u77ed\u7684\u5e7e\u5206\u9418\u5167\u5c31\u53ef\u4ee5\u7372\u5f97\u4e0d\u932f\u7684\u7d93\u679c\u3002\u4e26\u4e14\u6709\u6548\u7684\u964d\u4f4e loss\u3002\u9664\u4e86\u8ff4\u6b78\u554f\u984c Optuna \u4e5f\u80fd\u5c0d\u5206\u985e\u554f\u984c\u9032\u884c\u8d85\u53c3\u6578\u641c\u5c0b\uff0c\u5b98\u65b9\u7684 GitHub \u4e5f\u6709\u63d0\u4f9b\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6846\u67b6\u7684\u5beb\u6cd5\u3002 Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f TPESampler \u70ba\u9810\u8a2d\u7684\u8d85\u53c3\u6578\u63a1\u6a23\u5668\u3002\u5b83\u8a66\u5716\u900f\u904e\u63d0\u9ad8\u6700\u5f8c\u4e00\u6b21\u8a66\u9a57\u7684\u5206\u6578\u4f86\u5c0d\u8d85\u53c3\u6578\u5019\u9078\u8005\u9032\u884c\u63a1\u6a23\u3002\u9664\u6b64\u4e4b\u5916 Optuna \u63d0\u4f9b\u4e86\u4ee5\u4e0b\u9019\u5e7e\u500b\u53c3\u6578\u63a1\u6a23\u7684\u65b9\u5f0f: - GridSampler : \u8207 Sklearn \u7684 GridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002\u4f7f\u7528\u6b64\u65b9\u6cd5\u6642\u5efa\u8b70\u4e0d\u8981\u8a2d\u5b9a\u592a\u5927\u7684\u7bc4\u570d\u3002 - RandomSampler : \u8207 Sklearn \u7684 RandomizedGridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002 - TPESampler : \u5168\u540d Tree-structured Parzen Estimator sampler\u3002\u9810\u8a2d\u63a1\u6a23\u65b9\u5f0f\u3002 - CmaEsSampler : \u57fa\u65bc CMA ES \u6f14\u7b97\u7b97\u6cd5\u7684\u63a1\u6a23\u5668 (\u4e0d\u652f\u63f4\u985e\u5225\u578b\u7684\u8d85\u53c3\u6578). \u5982\u679c\u9700\u8981\u66ff\u63db\u63a1\u6a23\u53c3\u6578\u7684\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u3002 from optuna.samplers import CmaEsSampler , RandomSampler # Study with a random sampler study_1 = optuna . create_study ( sampler = RandomSampler ( seed = 42 )) # Study with a CMA ES sampler study_2 = optuna . create_study ( sampler = CmaEsSampler ( seed = 42 )) Optuna \u8996\u89ba\u5316\u5206\u6790 Optuna \u5728\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86\u8996\u89ba\u5316\u7684\u5957\u4ef6: - plot_optimization_history (\u8996\u89ba\u5316\u512a\u5316\u7684\u904e\u7a0b) - plot_intermediate_values (\u8996\u89ba\u5316\u5b78\u7fd2\u7684\u66f2\u7dda) - plot_parallel_coordinate (\u8996\u89ba\u5316\u9ad8\u7dad\u5ea6\u4e2d\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_contour (\u8996\u89ba\u5316\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_slice (\u8996\u89ba\u5316\u500b\u5225\u53c3\u6578) - plot_param_importances (\u53c3\u6578\u5c0d\u6a21\u578b\u7684\u91cd\u8981\u7a0b\u5ea6) - plot_edf (\u8996\u89ba\u5316\u9a57\u5206\u4f48\u51fd\u6578) \u5ef6\u7e8c\u4e0a\u9762\u7684\u7bc4\u4f8b\u6211\u5011\u4f86\u8996\u89ba\u5316\u5c55\u793a Optuna \u641c\u5c0b\u7684\u904e\u7a0b\u8207\u7d50\u679c\u3002\u9996\u5148\u6211\u5011\u4f86\u7e6a\u88fd study \u7684\u512a\u5316\u6b77\u53f2\u904e\u7a0b\u3002\u9019\u5f35\u5716\u544a\u8a34\u6211\u5011\uff0cOptuna \u53ea\u7d93\u904e\u5e7e\u6b21\u8a66\u9a57\u5c31\u4f7f\u5206\u6578\u6536\u6582\u5230\u6700\u5c0f\u503c\u3002 from optuna.visualization import plot_optimization_history plotly_config = { \"staticPlot\" : True } fig = plot_optimization_history ( study ) fig . show ( config = plotly_config ) \u63a5\u4e0b\u4f86\uff0c\u8b93\u6211\u5011\u7e6a\u88fd\u8d85\u53c3\u6578\u91cd\u8981\u6027\u3002\u5f9e\u9019\u5f35\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe eta(learning_rate) \u5b78\u7fd2\u901f\u7387\u662f\u6700\u70ba\u91cd\u8981\u7684\u3002\u6b64\u5916 grow_policy \u8207 lambda \u5c0d\u6e1b\u5c11 loss \u4e0a\u7121\u592a\u5927\u5e6b\u52a9\u3002\u56e0\u6b64\u5728\u4e0b\u4e00\u6b21\u57f7\u884c\u8a66\u9a57\u7684\u6642\u5019\u53ef\u4ee5\u8003\u616e\u5c07\u7121\u7528\u7684\u53c3\u6578\u79fb\u9664\uff0c\u4e26\u5c07\u91cd\u8981\u7684\u8d85\u53c3\u6578\u7bc4\u570d\u52a0\u5927\u53d6\u5f97\u66f4\u597d\u7684\u641c\u7d22\u7d50\u679c\u3002\u5176\u4ed6\u7684\u4f7f\u7528\u65b9\u6cd5\u53ef\u4ee5 \u53c3\u8003 \u5b98\u65b9\u7684\u8aaa\u660e\u6587\u4ef6\u3002 from optuna.visualization import plot_param_importances fig = plot_param_importances ( study ) fig . show ( config = plotly_config ) \u5c0f\u7d50 \u4eca\u5929\u6211\u5011\u4ecb\u7d39\u4e86\u9019\u4e00\u500b\u8d85\u53c3\u6578\u6700\u4f73\u5316\u7684\u5de5\u5177\uff0c\u88e1\u9762\u6709\u592a\u591a\u529f\u80fd\u5c1a\u672a\u63d0\u5230\u3002\u4f8b\u5982\uff1a\u8a66\u9a57\u7684\u526a\u679d\uff0c\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u8a2d\u5b9a\u8a66\u9a57\u7684\u4f8b\u5916\u689d\u4ef6\u7576\u4e0d\u6eff\u8db3\u9810\u5b9a\u689d\u4ef6\u5373\u4e0d\u57f7\u884c\u6b64\u6b21\u8a66\u9a57\u3002\u6216\u662f\u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002\u9664\u6b64\u4e4b\u5916\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u50cf\u662f SQLite \u7b49\u8cc7\u6599\u5eab\u53ef\u4ee5\u5132\u5b58\u6b77\u53f2\u641c\u5c0b\u7d50\u679c\u5feb\u901f\u7684\u9054\u5230\u6700\u4f73\u641c\u5c0b\u80fd\u529b\u3002\u91cd\u9ede\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u795e\u7d93\u7db2\u8def\u7684\u53c3\u6578\u641c\u5c0b\u4ee5\u53ca\u7db2\u8def\u7684\u5bec\u5ea6\u6df1\u5ea6\u9078\u64c7\u3002\u5e38\u898b\u7684\u6df1\u5ea6\u5b78\u7fd2\u6846\u67b6\u90fd\u80fd\u652f\u63f4\u4f8b\u5982 TensorFlow\u3001PyTorch\uff0cMXNet...\u7b49\u3002 Reference OPTUNA: A Flexible, Efficient and Scalable Hyperparameter Optimization Framework optuna.org \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna"},{"location":"21.Optuna/#day-21-optuna","text":"","title":"[Day 21] \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578\u5229\u5668 - Optuna"},{"location":"21.Optuna/#_1","text":"Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f \u5be6\u4f5c Optuna \u641c\u5c0b\u6700\u4f73\u8d85\u53c3\u6578 \u4ee5 XGBoost \u8ff4\u6b78\u6a21\u578b\u65bc\u623f\u50f9\u9810\u6e2c\u70ba\u4f8b Optuna \u8996\u89ba\u5316\u5206\u6790\u641c\u5c0b\u7d50\u679c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"21.Optuna/#_2","text":"\u4f60\u662f\u5426\u66fe\u7d93\u89ba\u5f97\u6a21\u578b\u6709\u592a\u591a\u7684\u8d85\u53c3\u6578\u800c\u611f\u5230\u53ad\u7169\u55ce\uff1f\u8981\u5f9e\u67d0\u4e00\u500b\u6f14\u7b97\u6cd5\u5f97\u5230\u597d\u7684\u89e3\u5fc5\u9808\u8981\u8abf\u6574\u8d85\u53c3\u6578\uff0c\u6240\u8b02\u7684\u8d85\u53c3\u6578\u5c31\u662f\u63a7\u5236\u8a13\u7df4\u6a21\u578b\u7684\u4e00\u7d44\u795e\u79d8\u6578\u5b57\uff0c\u4f8b\u5982\u5b78\u7fd2\u901f\u7387\u5c31\u662f\u4e00\u7a2e\u8d85\u53c3\u6578\u3002\u4f60\u6c38\u9060\u90fd\u4e0d\u77e5\u9053 0~1 \u4e4b\u9593\u54ea\u4e00\u500b\u6578\u5b57\u662f\u6700\u9069\u5408\u7684\uff0c\u552f\u4e00\u7684\u65b9\u6cd5\u5c31\u662f\u8a66\u932f (trial and error)\u3002\u90a3\u842c\u4e00\u6a21\u578b\u6709\u591a\u500b\u8d85\u53c3\u6578\u53ef\u4ee5\u63a7\u5236\uff0c\u8c48\u4e0d\u662f\u5c31\u6709\u6210\u5343\u4e0a\u842c\u7a2e\u7d44\u5408\u8981\u6162\u6162\u5617\u8a66\u55ce\uff1f\u5982\u679c\u4f60\u6709\u4e5f\u9019\u500b\u554f\u984c\uff0c\u770b\u9019\u7bc7\u5c31\u5c0d\u4e86\uff01\u96d6\u7136\u4f60\u53ef\u80fd\u807d\u904e Sklearn \u7684 GridSearchCV \u540c\u6a23\u4e5f\u662f\u66b4\u529b\u7684\u627e\u51fa\u6700\u4f73\u53c3\u6578\uff0c\u6216\u662f\u4f7f\u7528 RandomizedSearchCV \u6307\u5b9a\u8d85\u53c3\u6578\u7684\u7bc4\u570d\u4e26\u96a8\u6a5f\u7684\u62bd\u53d6\u53c3\u6578\u9032\u2f8f\u8a13\u7df4\uff0c\u5176\u5b83\u5011\u7684\u5171\u540c\u7f3a\u9ede\u662f\u975e\u5e38\u8017\u6642\u8207\u4f54\u7528\u6a5f\u5668\u8cc7\u6e90\u3002\u9019\u88e1\u6211\u5011\u8981\u4f86\u4ecb\u7d39 Optuna \u9019\u500b\u81ea\u52d5\u627e\u8d85\u53c3\u6578\u7684\u65b9\u4fbf\u5de5\u5177\uff0c\u4e26\u4e14\u53ef\u4ee5\u548c\u591a\u500b\u5e38\u7528\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u6574\u5408\u3002Optuna \u900f\u904e\u8abf\u6574\u9069\u7576\u7684\u8d85\u53c3\u6578\u4f86\u63d0\u9ad8\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u6b64\u5c08\u6848\u6700\u521d\u65bc 2019 \u767c\u8868\u65bc arxiv \u7684\u4e00\u7bc7\u8ad6\u6587 Optuna: A Next-generation Hyperparameter Optimization Framework \u540c\u6642\u958b\u6e90\u5728 GitHub \u4e0a\u514d\u8cbb\u63d0\u4f9b\u5927\u5bb6\u4f7f\u7528\u3002\u540c\u6642 Optuna \u4e5f\u662f 2021 \u5e74 Kaggle \u8cc7\u6599\u79d1\u5b78\u7af6\u8cfd\u4e2d\u6700\u5e38\u898b\u7684\u6a21\u578b\u8abf\u53c3\u5de5\u5177\u3002\u90a3\u662f\u4ec0\u9ebc\u539f\u56e0\u8b93 Optuna \u53d7\u5230\u5ee3\u5927\u7684\u6a5f\u5668\u5b78\u7fd2\u793e\u7fa4\u5982\u6b64\u7684\u6b61\u8fce\u5462\uff1f\u5c31\u8b93\u6211\u5011\u4f86\u770b\u770b\u4ed6\u662f\u5982\u6b64\u5730\u5f37\u5927\u5427\uff01","title":"\u524d\u8a00"},{"location":"21.Optuna/#optuna","text":"Optuna \u662f\u4e00\u500b\u5c08\u70ba\u6a5f\u5668\u5b78\u7fd2\u8a2d\u8a08\u7684\u81ea\u52d5\u8d85\u53c3\u6578\u512a\u5316\u7684\u6846\u67b6\u3002\u5176\u6700\u7a81\u51fa\u7684\u7279\u9ede\u662f\uff1a \u4eba\u6027\u5316\u7684\u5b9a\u7fa9\u641c\u7d22\u7a7a\u9593\u3002 \u652f\u63f4\u5927\u591a\u6578 ML \u8207 DL \u7684\u5b78\u7fd2\u5957\u4ef6\u3002\u4f8b\u5982: Sklearn\u3001PyTorch\u3001TensorFlow, XGBoost\u3001LightGBM\u3001 CatBoost...\u7b49\u3002 \u5c0d\u5c0d\u641c\u7d22\u7d50\u679c\u63d0\u4f9b\u53ef\u89e3\u91cb\u6027(XAI)\u3002 \u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002 \u6c7a\u5b9a\u4e26\u7d42\u6b62\u4e0d\u6eff\u8db3\u9810\u5b9a\u7fa9\u689d\u4ef6\u7684\u8a66\u9a57\u3002","title":"\u95dc\u65bc Optuna"},{"location":"21.Optuna/#optuna_1","text":"\u9019\u88e1\u6211\u5011\u8a2d\u5b9a\u4e00\u500b\u7c21\u55ae\u7684\u76ee\u6a19\u51fd\u5f0f $(x1+2)^2 + (x2-4)^2$\u3002\u6211\u5011\u90fd\u77e5\u9053\u7576\u9019\u500b\u5f0f\u5b50 x1=-2, x2=4 \u6642\u5c07\u6703\u6709\u6975\u5c0f\u503c 0\u3002\u56e0\u6b64\u6211\u5011\u5c31\u7528\u9019\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\u900f\u904e Optuna \u627e\u51fa\u9019\u500b\u51fd\u5f0f\u4e2d\u6975\u5c0f\u503c\u6240\u5c0d\u61c9\u7684 x1 \u8207 x2 \u5427\u3002 import optuna def objective ( trial ): x1 = trial . suggest_float ( \"x1\" , - 5 , 5 ) x2 = trial . suggest_float ( \"x2\" , - 5 , 5 ) return ( x1 + 2 ) ** 2 + ( x2 - 4 ) ** 2 \u9996\u5148\u8f09\u5165 optuna \u5957\u4ef6\uff0c\u5982\u679c\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install optuna \u63a5\u8457\u6211\u5011\u4f86\u5b9a\u7fa9\u4e00\u500b\u627e\u51fa\u6975\u5c0f\u503c\u7684\u76ee\u6a19\u51fd\u5f0f objective() \u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u5c07\u8981\u8a2d\u5b9a optuna \u53ef\u4ee5\u53bb\u5c0b\u627e\u7684\u4e00\u53c3\u6578\uff0c\u4e5f\u5c31\u662f x1 \u8207 x2\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e optuna \u6240\u63d0\u4f9b\u7684 trial \u7269\u4ef6\u4f86\u70ba\u6211\u5011\u7684\u8d85\u53c3\u6578\u8a2d\u5b9a\u4e00\u7d44\u7bc4\u570d\u3002\u5176\u4e2d\u5b83\u6709\u4e00\u500b suggest_float \u65b9\u6cd5\uff0c\u8a72\u65b9\u6cd5\u63a1\u7528\u8d85\u53c3\u6578\u7684\u540d\u7a31\u548c\u7bc4\u570d\u4f86\u5c0b\u627e\u5176\u6700\u4f73\u503c\u3002\u6211\u5011\u4ee5 x1 \u4f86\u8209\u4f8b\uff1a x1 = trial.suggest_float(\"x1\", -5, 5) \u4e0a\u9762\u9019\u4e00\u6bb5\u7a0b\u5f0f\u5728 GridSearch \u4e2d\u53ef\u4ee5\u8868\u793a\u6210 {\"x1\": np.arange(-5, 5, .1)} \u3002\u5373\u8868\u793a\u641c\u5c0b\u904e\u7a0b\u4e2d\u6211\u5011\u6703\u5f9e x1 \u96a8\u6a5f\u8a2d\u5b9a -5~5 \u4e4b\u9593\u7684\u4efb\u4e00\u6d6e\u9ede\u6578\u3002\u8a2d\u5b9a\u5b8c\u51fd\u5f0f\u5f8c\u5c31\u53ef\u4ee5\u958b\u59cb\u512a\u5316\u4e86\uff0c\u6211\u5011\u5f9e optuna \u5efa\u7acb\u4e00\u500b study \u7269\u4ef6\uff0c\u4e26\u5c07 objective \u51fd\u6578\u50b3\u905e\u7d66 study \u7684 optimize \u65b9\u6cd5\u3002\u7531\u65bc\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u627e\u51fa\u51fd\u5f0f\u4e2d\u7684\u6975\u5c0f\u503c\uff0c\u56e0\u6b64 direction \u8a2d\u70ba minimize \u3002\u53e6\u5916\u5728 optimize \u65b9\u6cd5\u4e2d\u6211\u5011\u4e5f\u53ef\u4ee5\u8a2d\u5b9a\u8a66\u9a57\u7684\u6b21\u6578(n_trials)\u6216\u6642\u9593(timeout)\u3002\u4e00\u5207\u5c31\u7dd2\u5f8c\u5373\u53ef\u958b\u59cb\u57f7\u884c\uff01\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u8fed\u4ee350\u6b21\u4e26\u5f9e\u4e2d\u627e\u5230\u4e00\u7d44\u6700\u4f73\u7684 x1 \u8207 x2 \u4f7f\u5176\u76ee\u6a19\u51fd\u5f0f\u53ef\u4ee5\u6700\u5c0f\u5316\u3002\u8dd1\u5b8c 50 \u6b21\u5f8c\u6211\u5011\u53ef\u4ee5\u7d93\u7531 study \u8b8a\u6578\u4e2d\u5f97\u5230\u4e00\u7d44\u6700\u4f73\u7684\u89e3\u3002\u8a66\u9a57\u7d50\u675f\u5f8c\u6211\u5011\u53ef\u4ee5\u767c\u73fe x1 \u8da8\u8fd1\u65bc -2 \u548c x2 \u8da8\u8fd1\u65bc 4\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 50 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) \u8f38\u51fa\u7d50\u679c\uff1a Number of finished trials: 50 Best trial parameters: {'x1': -1.8154924755761588, 'x2': 3.9141985823539844} Best score: 0.04140490983908035 CPU times: user 432 ms, sys: 46.3 ms, total: 478 ms Wall time: 431 ms \u7531\u4e0a\u8ff0\u7684\u7c21\u55ae\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u5efa\u7acb\u4e00\u500b optuna \u6700\u4f73\u5316\u6d41\u7a0b\u50c5\u9700\u8981\u4e09\u6b65\u9a5f\uff1a 1. \u5efa\u7acb objective \u51fd\u5f0f\u8207\u8a2d\u5b9a trial\uff0c\u4e26\u56de\u50b3 loss\u3002 2. \u5efa\u7acb create_study() \u7269\u4ef6\u3002 3. \u4f7f\u7528 optimize() \u57f7\u884c\u641c\u5c0b\u3002","title":"Optuna \u7c21\u55ae\u7bc4\u4f8b"},{"location":"21.Optuna/#end-to-end-example-with-xgboost","text":"\u6211\u5011\u4ee5 Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u593e\u4f86\u505a\u7bc4\u4f8b\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\uff0c\u5176\u4e2d\u8f38\u5165\u7279\u5fb5\u6709 13 \u500b\u5176\u8f38\u51fa\u70ba\u9810\u6e2c\u8a72\u7b46\u8cc7\u6599\u7684\u623f\u50f9\u3002\u7531\u65bc\u60f3\u8981\u5feb\u901f\u793a\u7bc4\u5982\u4f55\u4f7f\u7528 optuna\uff0c\u56e0\u6b64\u9019\u88e1\u5c31\u4e0d\u505a\u4efb\u4f55\u8cc7\u6599 EDA \u8207\u524d\u8655\u7406\u3002 from sklearn.datasets import load_boston X , y = load_boston ( return_X_y = True ) print ( 'X:' , X . shape ) print ( 'y:' , y . shape ) \u8f38\u51fa\u7d50\u679c\uff1a X: (506, 13) y: (506,) \u8cc7\u6599\u96c6\u6210\u529f\u88ab\u8f09\u5165\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5efa\u7acb\u4e00\u500b objective \u51fd\u5f0f\u3002\u5728\u9019\u500b\u76ee\u6a19\u51fd\u5f0f\u4e2d\uff0c\u6211\u5011\u5efa\u7acb\u4e86\u4e00\u500b\u5c0f\u7bc4\u570d\u7684\u7684 XGBoost \u8d85\u53c3\u6578\u641c\u7d22\u7a7a\u9593\u3002\u5176\u6bcf\u4e00\u500b\u8d85\u53c3\u6578\u90fd\u6703\u6709\u4e00\u500b\u641c\u7d22\u7684\u7bc4\u570d\uff0c\u53ef\u4ee5\u4f7f\u7528 suggest_* \u65b9\u6cd5\u8a2d\u5b9a\u5340\u9593\u3002\u6b64\u65b9\u6cd5\u5fc5\u9808\u8f38\u5165\u8d85\u53c3\u6578\u7684\u540d\u7a31\uff0c\u4ee5\u53ca\u7d66\u4e88\u8a72\u53c3\u6578\u7684\u4e00\u7d44\u96a8\u6a5f\u7bc4\u570d\u5176\u578b\u614b\u6709\u5f88\u591a\u4f8b\u5982\uff1a suggest_int \u3001 suggest_discrete_uniform \u3001 suggest_float ...\u7b49\u3002\u66f4\u591a\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u5f9e \u5b98\u65b9\u6587\u4ef6 \u53d6\u5f97\u3002\u6216\u662f\u4e5f\u53ef\u4ee5\u53c3\u8003\u5b98\u65b9\u5728 GitHub \u4e0a\u5c0d\u65bc XGBoost \u7684\u4f7f\u7528\u7bc4\u4f8b\u3002 import optuna import xgboost as xgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def objective ( trial , X = X , y = y ): \"\"\" A function to train a model using different hyperparamerters combinations provided by Optuna. \"\"\" X_train , X_valid , y_train , y_valid = train_test_split ( X , y , test_size = 0.4 ) params = { 'max_depth' : trial . suggest_int ( 'max_depth' , 6 , 15 ), \"subsample\" : trial . suggest_float ( \"subsample\" , 0.2 , 1.0 ), 'n_estimators' : trial . suggest_int ( 'n_estimators' , 500 , 2000 , 100 ), 'eta' : trial . suggest_float ( \"eta\" , 1e-8 , 1.0 , log = True ), 'alpha' : trial . suggest_float ( 'alpha' , 1e-8 , 1.0 , log = True ), 'lambda' : trial . suggest_float ( 'lambda' , 1e-8 , 1.0 , log = True ), 'gamma' : trial . suggest_float ( \"gamma\" , 1e-8 , 1.0 , log = True ), 'min_child_weight' : trial . suggest_int ( 'min_child_weight' , 2 , 10 ), 'grow_policy' : trial . suggest_categorical ( \"grow_policy\" , [ \"depthwise\" , \"lossguide\" ]), \"colsample_bytree\" : trial . suggest_float ( \"colsample_bytree\" , 0.2 , 1.0 ) } reg = xgb . XGBRegressor ( ** params ) reg . fit ( X_train , y_train , eval_set = [( X_valid , y_valid )], eval_metric = 'rmse' , verbose = False ) return mean_squared_error ( y_valid , reg . predict ( X_valid ), squared = False ) \u8a2d\u5b9a\u597d\u8abf\u53c3\u7684\u5340\u9593\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u56c9\u3002 %% time # Creating Optuna object and defining its parameters study = optuna . create_study ( direction = 'minimize' ) study . optimize ( objective , n_trials = 10 ) # Showing optimization results print ( 'Number of finished trials:' , len ( study . trials )) print ( 'Best trial parameters:' , study . best_trial . params ) print ( 'Best score:' , study . best_value ) Optuna \u9810\u8a2d\u7684\u8d85\u53c3\u6578\u641c\u5c0b\u65b9\u6cd5\u80fd\u6709\u6548\u5730\u5728\u77ed\u6642\u9593\u5167\u5f80\u6700\u4f73\u7684\u65b9\u5411\u53bb\u5c0b\u627e\u4e00\u7d44\u9069\u5408\u7684\u53c3\u6578\u3002\u8207 GridSearch \u76f8\u6bd4\u539f\u672c\u53ef\u80fd\u9700\u8981\u6578\u5c0f\u6642\u7684\u641c\u7d22\u7a7a\u9593\u5728\u77ed\u77ed\u7684\u5e7e\u5206\u9418\u5167\u5c31\u53ef\u4ee5\u7372\u5f97\u4e0d\u932f\u7684\u7d93\u679c\u3002\u4e26\u4e14\u6709\u6548\u7684\u964d\u4f4e loss\u3002\u9664\u4e86\u8ff4\u6b78\u554f\u984c Optuna \u4e5f\u80fd\u5c0d\u5206\u985e\u554f\u984c\u9032\u884c\u8d85\u53c3\u6578\u641c\u5c0b\uff0c\u5b98\u65b9\u7684 GitHub \u4e5f\u6709\u63d0\u4f9b\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6846\u67b6\u7684\u5beb\u6cd5\u3002","title":"End-to-end example with XGBoost"},{"location":"21.Optuna/#optuna_2","text":"TPESampler \u70ba\u9810\u8a2d\u7684\u8d85\u53c3\u6578\u63a1\u6a23\u5668\u3002\u5b83\u8a66\u5716\u900f\u904e\u63d0\u9ad8\u6700\u5f8c\u4e00\u6b21\u8a66\u9a57\u7684\u5206\u6578\u4f86\u5c0d\u8d85\u53c3\u6578\u5019\u9078\u8005\u9032\u884c\u63a1\u6a23\u3002\u9664\u6b64\u4e4b\u5916 Optuna \u63d0\u4f9b\u4e86\u4ee5\u4e0b\u9019\u5e7e\u500b\u53c3\u6578\u63a1\u6a23\u7684\u65b9\u5f0f: - GridSampler : \u8207 Sklearn \u7684 GridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002\u4f7f\u7528\u6b64\u65b9\u6cd5\u6642\u5efa\u8b70\u4e0d\u8981\u8a2d\u5b9a\u592a\u5927\u7684\u7bc4\u570d\u3002 - RandomSampler : \u8207 Sklearn \u7684 RandomizedGridSearch \u63a1\u6a23\u65b9\u5f0f\u76f8\u540c\u3002 - TPESampler : \u5168\u540d Tree-structured Parzen Estimator sampler\u3002\u9810\u8a2d\u63a1\u6a23\u65b9\u5f0f\u3002 - CmaEsSampler : \u57fa\u65bc CMA ES \u6f14\u7b97\u7b97\u6cd5\u7684\u63a1\u6a23\u5668 (\u4e0d\u652f\u63f4\u985e\u5225\u578b\u7684\u8d85\u53c3\u6578). \u5982\u679c\u9700\u8981\u66ff\u63db\u63a1\u6a23\u53c3\u6578\u7684\u65b9\u5f0f\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u3002 from optuna.samplers import CmaEsSampler , RandomSampler # Study with a random sampler study_1 = optuna . create_study ( sampler = RandomSampler ( seed = 42 )) # Study with a CMA ES sampler study_2 = optuna . create_study ( sampler = CmaEsSampler ( seed = 42 ))","title":"Optuna \u5982\u4f55\u63a1\u6a23\u53c3\u6578\uff1f"},{"location":"21.Optuna/#optuna_3","text":"Optuna \u5728\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86\u8996\u89ba\u5316\u7684\u5957\u4ef6: - plot_optimization_history (\u8996\u89ba\u5316\u512a\u5316\u7684\u904e\u7a0b) - plot_intermediate_values (\u8996\u89ba\u5316\u5b78\u7fd2\u7684\u66f2\u7dda) - plot_parallel_coordinate (\u8996\u89ba\u5316\u9ad8\u7dad\u5ea6\u4e2d\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_contour (\u8996\u89ba\u5316\u53c3\u6578\u9593\u7684\u5f7c\u6b64\u95dc\u4fc2) - plot_slice (\u8996\u89ba\u5316\u500b\u5225\u53c3\u6578) - plot_param_importances (\u53c3\u6578\u5c0d\u6a21\u578b\u7684\u91cd\u8981\u7a0b\u5ea6) - plot_edf (\u8996\u89ba\u5316\u9a57\u5206\u4f48\u51fd\u6578) \u5ef6\u7e8c\u4e0a\u9762\u7684\u7bc4\u4f8b\u6211\u5011\u4f86\u8996\u89ba\u5316\u5c55\u793a Optuna \u641c\u5c0b\u7684\u904e\u7a0b\u8207\u7d50\u679c\u3002\u9996\u5148\u6211\u5011\u4f86\u7e6a\u88fd study \u7684\u512a\u5316\u6b77\u53f2\u904e\u7a0b\u3002\u9019\u5f35\u5716\u544a\u8a34\u6211\u5011\uff0cOptuna \u53ea\u7d93\u904e\u5e7e\u6b21\u8a66\u9a57\u5c31\u4f7f\u5206\u6578\u6536\u6582\u5230\u6700\u5c0f\u503c\u3002 from optuna.visualization import plot_optimization_history plotly_config = { \"staticPlot\" : True } fig = plot_optimization_history ( study ) fig . show ( config = plotly_config ) \u63a5\u4e0b\u4f86\uff0c\u8b93\u6211\u5011\u7e6a\u88fd\u8d85\u53c3\u6578\u91cd\u8981\u6027\u3002\u5f9e\u9019\u5f35\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe eta(learning_rate) \u5b78\u7fd2\u901f\u7387\u662f\u6700\u70ba\u91cd\u8981\u7684\u3002\u6b64\u5916 grow_policy \u8207 lambda \u5c0d\u6e1b\u5c11 loss \u4e0a\u7121\u592a\u5927\u5e6b\u52a9\u3002\u56e0\u6b64\u5728\u4e0b\u4e00\u6b21\u57f7\u884c\u8a66\u9a57\u7684\u6642\u5019\u53ef\u4ee5\u8003\u616e\u5c07\u7121\u7528\u7684\u53c3\u6578\u79fb\u9664\uff0c\u4e26\u5c07\u91cd\u8981\u7684\u8d85\u53c3\u6578\u7bc4\u570d\u52a0\u5927\u53d6\u5f97\u66f4\u597d\u7684\u641c\u7d22\u7d50\u679c\u3002\u5176\u4ed6\u7684\u4f7f\u7528\u65b9\u6cd5\u53ef\u4ee5 \u53c3\u8003 \u5b98\u65b9\u7684\u8aaa\u660e\u6587\u4ef6\u3002 from optuna.visualization import plot_param_importances fig = plot_param_importances ( study ) fig . show ( config = plotly_config )","title":"Optuna \u8996\u89ba\u5316\u5206\u6790"},{"location":"21.Optuna/#_3","text":"\u4eca\u5929\u6211\u5011\u4ecb\u7d39\u4e86\u9019\u4e00\u500b\u8d85\u53c3\u6578\u6700\u4f73\u5316\u7684\u5de5\u5177\uff0c\u88e1\u9762\u6709\u592a\u591a\u529f\u80fd\u5c1a\u672a\u63d0\u5230\u3002\u4f8b\u5982\uff1a\u8a66\u9a57\u7684\u526a\u679d\uff0c\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u8a2d\u5b9a\u8a66\u9a57\u7684\u4f8b\u5916\u689d\u4ef6\u7576\u4e0d\u6eff\u8db3\u9810\u5b9a\u689d\u4ef6\u5373\u4e0d\u57f7\u884c\u6b64\u6b21\u8a66\u9a57\u3002\u6216\u662f\u5132\u5b58\u6b77\u53f2\u6700\u4f73\u7684\u53c3\u6578\u5be6\u73fe\u5e73\u884c\u512a\u5316\u5de5\u4f5c\u3002\u9664\u6b64\u4e4b\u5916\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u50cf\u662f SQLite \u7b49\u8cc7\u6599\u5eab\u53ef\u4ee5\u5132\u5b58\u6b77\u53f2\u641c\u5c0b\u7d50\u679c\u5feb\u901f\u7684\u9054\u5230\u6700\u4f73\u641c\u5c0b\u80fd\u529b\u3002\u91cd\u9ede\u6b64\u5957\u4ef6\u9084\u652f\u63f4\u795e\u7d93\u7db2\u8def\u7684\u53c3\u6578\u641c\u5c0b\u4ee5\u53ca\u7db2\u8def\u7684\u5bec\u5ea6\u6df1\u5ea6\u9078\u64c7\u3002\u5e38\u898b\u7684\u6df1\u5ea6\u5b78\u7fd2\u6846\u67b6\u90fd\u80fd\u652f\u63f4\u4f8b\u5982 TensorFlow\u3001PyTorch\uff0cMXNet...\u7b49\u3002","title":"\u5c0f\u7d50"},{"location":"21.Optuna/#reference","text":"OPTUNA: A Flexible, Efficient and Scalable Hyperparameter Optimization Framework optuna.org \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"22.Plotly-Express/","text":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5b89\u88dd plotly \u624b\u628a\u624b\u5be6\u4f5c\u8996\u89ba\u5316\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 \u76f4\u65b9\u5716 \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6563\u4f48\u5716 \u7bb1\u5f62\u5716 \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u532f\u51fa\u5716\u7247 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 Plotly Express \u662f\u4e00\u500b\u9ad8\u7cbe\u7dfb\u7684\u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002\u521d\u5b78\u6a5f\u5668\u5b78\u7fd2\u7684\u4f60\u4e00\u5b9a\u78b0\u904e\u50cf\u662f matplotlib \u548c seaborn \u9019\u985e\u578b\u7684\u5716\u8868\u5316\u5957\u4ef6\uff0c\u4e0d\u904e\u4f7f\u7528\u904e Plotly Express \u6703\u8b93\u4f60\u5c0d\u65bc\u8cc7\u6599\u8996\u89ba\u5316\u6709\u66f4\u4e0d\u4e00\u6a23\u7684\u9ad4\u9a57\u3002\u5b83\u7684\u529f\u80fd\u4f7f\u7528\u8d77\u4f86\u975e\u5e38\u76f4\u89c0\uff0c\u4e26\u4e14\u53ef\u4ee5\u5f88\u597d\u5730\u8207 Pandas DataFrame \u914d\u5408\u4f7f\u7528\u3002 Plotly Express \u65bc 2019 \u5e74\u7531\u52a0\u62ff\u5927 Plotly \u9019\u9593\u516c\u53f8\u91cb\u51fa\u4e86\u7b2c\u4e00\u7248\u9ad8\u968e\u7684 Python \u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002 \u5b89\u88dd plotly \u82e5\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u8b80\u8005\uff0c\u53ef\u4ee5\u958b\u555f\u7d42\u7aef\u6a5f\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install plotly 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u5728\u4eca\u5929\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u4e00\u6a23\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4f86\u505a\u793a\u7bc4\uff0c\u8b93\u5927\u5bb6\u77a7\u77a7 Plotly Express \u662f\u5982\u512a\u96c5\u7684\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u3002 import plotly.express as px import plotly.graph_objects as go from IPython.display import HTML df_data = px . data . iris () df_data \u76f4\u65b9\u5716 \u70ba\u4e86\u66f4\u6e05\u695a\u4e86\u89e3\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u76f4\u65b9\u5716 histogram \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u3002\u5f9e\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u66f4\u6e05\u695a\u77e5\u9053\u7279\u5fb5\u7684\u6bcf\u500b\u503c\u7684\u983b\u7387\u5206\u4f48\u3002\u7531\u65bc\u76ee\u524d\u7248\u672c\u5728 Notebook \u7121\u6cd5\u76f4\u63a5\u4f7f\u7528 fig.show() \u986f\u793a\u4e92\u52d5\u5716\uff0c\u5fc5\u9808\u5b89\u88dd\u4e00\u4e9b\u5c0f\u63d2\u4ef6\u6a21\u7d44\u8207\u8a2d\u5b9a\u3002\u56e0\u6b64\u7bc4\u4f8b\u4e2d\u63a1\u7528\u6700\u7c21\u55ae\u65b9\u6cd5\uff0c\u5148\u8f49\u63db\u6210 HTML code \u4e26\u900f\u904e IPython.display \u4e2d\u7684 HTML \u65b9\u6cd5\u986f\u793a\u51fa\u4f86\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u4e5f\u80fd\u89c0\u5bdf\u6bcf\u4e00\u500b\u7368\u7acb\u7279\u5fb5\u5c0d\u65bc\u82b1\u7684\u54c1\u7a2e\u7684\u6bcf\u500b\u5206\u5e03\u72c0\u6cc1\u3002\u6bcf\u500b\u4e0d\u540c\u7684\u984f\u8272\u4ee3\u8868\u4e0d\u540c\u7684\u82b1\u6735\u54c1\u7a2e\uff0c\u6211\u5011\u53ef\u4ee5\u85c9\u7531\u53c3\u6578\u8a2d\u5b9a\u6bcf\u500b\u76f4\u65b9\u5716\u662f\u5426\u91cd\u758a\uff0c\u4ee5\u53ca\u91cd\u758a\u7684\u900f\u660e\u7a0b\u5ea6\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" , color = \"species\" ) fig . update_layout ( barmode = 'overlay' ) fig . update_traces ( opacity = 0.75 ) HTML ( fig . to_html ()) \u63a5\u4e0b\u4f86\u4e00\u6a23\u900f\u904e\u76f4\u65b9\u5716\u65b9\u5f0f\u4f86\u89c0\u5bdf\u6bcf\u500b\u82b1\u6735\u54c1\u7a2e\u7684\u6578\u91cf\u3002\u5f9e\u8996\u89ba\u5316\u53ef\u4ee5\u5f88\u6e05\u695a\u5f97\u77e5\u8a72\u8cc7\u6599\u96c6\u662f\u5426\u662f\u4e00\u500b\u5e73\u7a69\u7684\u8cc7\u6599\u96c6\u3002 fig = px . histogram ( df_data , x = 'species' , y = 'sepal_width' , histfunc = 'count' , height = 300 , title = 'Histogram Chart' ) HTML ( fig . to_html ()) \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6211\u5011\u53ef\u4ee5\u63a1\u7528 scatter_matrix \u70ba\u6bcf\u4e00\u500b\u7279\u5fb5\u5f7c\u6b64\u9593\u505a\u4e00\u500b\u95dc\u806f\u5ea6\u5206\u6790\u3002\u900f\u904e\u9019\u7a2e\u8996\u89ba\u5316\u65b9\u5f0f\u6211\u5011\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u5169\u500b\u7279\u5fb5\u9593\u662f\u5426\u6b63\u76f8\u95dc\u8207\u8ca0\u76f8\u95dc\u3002 fig = px . scatter_matrix ( df_data , dimensions = [ \"sepal_width\" , \"sepal_length\" , \"petal_width\" , \"petal_length\" ], color = \"species\" ) HTML ( fig . to_html ()) \u6563\u4f48\u5716 \u6211\u5011\u4f7f\u7528\u6563\u4f48\u5716\u5c07\u82b1\u843c\u7684\u9577\u5ea6\u8207\u5bec\u5ea6\u986f\u793a\u5728\u4e8c\u7dad\u5750\u6a19\u5e73\u9762\u4e0a\u3002\u4f7f\u7528 Plotly Express \u5957\u4ef6\u4e2d\u7684 scatter \u65b9\u6cd5\uff0c\u6211\u5011\u53ef\u4ee5\u8f15\u9b06\u69cb\u5efa\u5716\u5f62\uff0c\u4e26\u653e\u5165 DataFrame \u683c\u5f0f\u7684\u8cc7\u6599\u4e26\u6307\u5b9a\u5fc5\u8981\u53c3\u6578 x \u8ef8\u4e2d\u7684\u8b8a\u6578\u548c y \u8ef8\u4e2d\u7684\u8b8a\u6578\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" ) HTML ( fig . to_html ()) \u5982\u679c\u60f3\u8981\u66f4\u6e05\u695a\u8868\u9054\u6bcf\u500b\u8cc7\u6599\u9ede\u6240\u5c0d\u61c9\u7684\u985e\u5225\uff0c\u53ef\u4ee5\u518d\u52a0\u4e0a color \u4e26\u6307\u5b9a\u7a2e\u985e\u7684\u6b04\u4f4d\u5373\u6703\u5c07\u6240\u6709\u8cc7\u6599\u81ea\u52d5\u5206\u6210\u4e09\u985e\u3002\u6b64\u5916\u6211\u5011\u4e5f\u80fd\u5920\u8a2d\u5b9a\u6ed1\u9f20\u79fb\u5230\u8cc7\u6599\u9ede\u4e0a\u6240\u986f\u793a\u7684\u8cc7\u8a0a\uff0c\u900f\u904e hover_data \u4e26\u7d66\u4e88\u6307\u5b9a\u6b04\u4f4d\u5373\u53ef\u770b\u5230\u8f38\u51fa\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , hover_data = [ 'petal_length' , 'petal_width' ]) HTML ( fig . to_html ()) Plotly Express \u4e5f\u63d0\u4f9b\u4e09\u7dad\u7684\u8996\u89ba\u5316\uff0c\u6b64\u5916\u4f7f\u7528\u8005\u4e5f\u80fd\u5920\u904e\u63a7\u5236\u8b8a\u7248\u81ea\u7531\u7684\u653e\u5927\u8207\u7e2e\u5c0f\u751a\u81f3\u65cb\u8f49\u3002\u4e0b\u5716\u7bc4\u4f8b\u4e2d\u6211\u5011\u5c07 x \u8ef8\u8a2d\u5b9a\u82b1\u843c\u5bec\u5ea6\uff0cy \u8ef8\u8a2d\u5b9a\u82b1\u843c\u9577\u5ea6\uff0cz \u8ef8\u8a2d\u5b9a\u82b1\u74e3\u5bec\u5ea6\u3002\u6b64\u5916 size \u53ef\u4ee5\u63a7\u5236\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5927\u5c0f\uff0c\u9019\u88e1\u63a1\u7528\u82b1\u74e3\u7684\u9577\u5ea6\u505a\u70ba\u6bcf\u500b\u8cc7\u6599\u9ede\u5927\u5c0f\u7684\u4f9d\u64da\u3002\u56e0\u6b64\u5f9e\u9019\u500b\u7acb\u9ad4\u7a7a\u9593\u53ef\u4ee5\u767c\u73fe\u5f9e\u82b1\u74e3\u9577\u5ea6\u5c0d\u65bc\u82b1\u7684\u7a2e\u985e\u6709\u5f88\u5f37\u7684\u95dc\u806f\u6027\u3002 fig = px . scatter_3d ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , z = \"petal_width\" , color = \"species\" , size = 'petal_length' ) HTML ( fig . to_html ()) \u7bb1\u5f62\u5716 \u6211\u5011\u53ef\u4ee5\u900f\u904e\u7bb1\u5f62\u5716\u9032\u884c\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u5206\u6790\u3002\u5f9e\u8996\u89ba\u5316\u5716\u4e2d\u53ef\u4ee5\u6e05\u695a\u5730\u77e5\u9053\u82b1\u843c\u7684\u5bec\u5ea6\u7bc4\u570d\u4ecb\u65bc 2~4.5 \u4e4b\u9593\uff0c\u4ee5\u53ca\u56db\u5206\u4f4d\u6578\u548c\u96e2\u7fa4\u503c\u7684\u8a0a\u606f\u3002 fig = px . box ( df_data , y = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u80fd\u5920\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u82b1\u843c\u5bec\u5ea6\u5c0d\u65bc\u6bcf\u500b\u54c1\u7a2e\u7684\u5206\u5e03\u72c0\u6cc1\u3002 fig = px . box ( df_data , x = \"species\" , y = \"sepal_width\" , color = \"species\" ) HTML ( fig . to_html ()) \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u8907\u5408\u578b\u8996\u89ba\u5316\u65b9\u5f0f\u53ef\u4ee5\u540c\u6642\u9810\u89bd\u5169\u500b\u8b8a\u6578\u9593\u7684\u6563\u4f48\u5716\u8207\u7bb1\u578b\u5716\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u900f\u904e marginal_y \u8207 marginal_x \u8a2d\u7f6e\u6a6b\u8ef8\u8207\u7e31\u8ef8\u7684\u8996\u89ba\u5316\u65b9\u5f0f\uff0c\u56e0\u6b64\u5728\u4e00\u500b\u5716\u8868\u4e2d\u53ef\u4ee5\u7d50\u5408\u5169\u7a2e\u8996\u89ba\u5316\u3002\u6b64\u5916 trendline \u53ef\u4ee5\u70ba\u6563\u4f48\u5716\u7e6a\u88fd\u8da8\u52e2\u7dda\uff0c\u8a2d\u7f6e ols \u6703\u63a1\u7528\u6700\u5c0f\u5e73\u65b9\u6cd5\u4f4d\u6578\u64da\u5efa\u7acb\u4e00\u500b\u7dda\u6027\u8ff4\u6b78\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"box\" , marginal_x = \"box\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u4e5f\u53ef\u4ee5\u8a66\u8457\u5c07\u6a6b\u8ef8\u8207\u7e31\u8ef8\u6539\u6210\u76f4\u65b9\u5716\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"histogram\" , marginal_x = \"histogram\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u532f\u51fa\u5716\u7247 \u65b9\u6cd5\u4e00 \u76f4\u63a5\u9ede\u9078\u63a7\u5236\u9762\u677f\u7684\u76f8\u6a5f\u5716\u793a (Download plot as a png) \u53ef\u4ee5\u7acb\u5373\u4e0b\u8f09\u5716\u7247\u3002 \u65b9\u6cd5\u4e8c \u9996\u5148\u8981\u5b89\u88dd kaleido \u624d\u80fd\u532f\u51fa Plotly Express \u7684\u975c\u614b\u5716\u7247\u3002 !pip install kaleido \u532f\u51fa\u975c\u614b\u5716\u7247 fig . write_image ( \"./demo.png\" ) \u532f\u51fa\u7db2\u9801\u683c\u5f0f\uff0c\u4fdd\u7559\u4e92\u52d5\u5f62\u5f0f fig . write_html ( \"./demo.html\" ) Reference Plotly Express API Doc Plotly Express GitHub \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express"},{"location":"22.Plotly-Express/#day-22-python-plotly-express","text":"","title":"[Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express"},{"location":"22.Plotly-Express/#_1","text":"\u5b89\u88dd plotly \u624b\u628a\u624b\u5be6\u4f5c\u8996\u89ba\u5316\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6 \u76f4\u65b9\u5716 \u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790 \u6563\u4f48\u5716 \u7bb1\u5f62\u5716 \u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7 \u532f\u51fa\u5716\u7247 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"22.Plotly-Express/#_2","text":"Plotly Express \u662f\u4e00\u500b\u9ad8\u7cbe\u7dfb\u7684\u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002\u521d\u5b78\u6a5f\u5668\u5b78\u7fd2\u7684\u4f60\u4e00\u5b9a\u78b0\u904e\u50cf\u662f matplotlib \u548c seaborn \u9019\u985e\u578b\u7684\u5716\u8868\u5316\u5957\u4ef6\uff0c\u4e0d\u904e\u4f7f\u7528\u904e Plotly Express \u6703\u8b93\u4f60\u5c0d\u65bc\u8cc7\u6599\u8996\u89ba\u5316\u6709\u66f4\u4e0d\u4e00\u6a23\u7684\u9ad4\u9a57\u3002\u5b83\u7684\u529f\u80fd\u4f7f\u7528\u8d77\u4f86\u975e\u5e38\u76f4\u89c0\uff0c\u4e26\u4e14\u53ef\u4ee5\u5f88\u597d\u5730\u8207 Pandas DataFrame \u914d\u5408\u4f7f\u7528\u3002 Plotly Express \u65bc 2019 \u5e74\u7531\u52a0\u62ff\u5927 Plotly \u9019\u9593\u516c\u53f8\u91cb\u51fa\u4e86\u7b2c\u4e00\u7248\u9ad8\u968e\u7684 Python \u8cc7\u6599\u8996\u89ba\u5316\u5957\u4ef6\u3002","title":"\u524d\u8a00"},{"location":"22.Plotly-Express/#plotly","text":"\u82e5\u5c1a\u672a\u5b89\u88dd\u6b64\u5957\u4ef6\u7684\u8b80\u8005\uff0c\u53ef\u4ee5\u958b\u555f\u7d42\u7aef\u6a5f\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u9032\u884c\u5b89\u88dd\uff1a pip install plotly","title":"\u5b89\u88dd plotly"},{"location":"22.Plotly-Express/#1","text":"\u5728\u4eca\u5929\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u4e00\u6a23\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4f86\u505a\u793a\u7bc4\uff0c\u8b93\u5927\u5bb6\u77a7\u77a7 Plotly Express \u662f\u5982\u512a\u96c5\u7684\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u3002 import plotly.express as px import plotly.graph_objects as go from IPython.display import HTML df_data = px . data . iris () df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"22.Plotly-Express/#_3","text":"\u70ba\u4e86\u66f4\u6e05\u695a\u4e86\u89e3\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u76f4\u65b9\u5716 histogram \u505a\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u3002\u5f9e\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u66f4\u6e05\u695a\u77e5\u9053\u7279\u5fb5\u7684\u6bcf\u500b\u503c\u7684\u983b\u7387\u5206\u4f48\u3002\u7531\u65bc\u76ee\u524d\u7248\u672c\u5728 Notebook \u7121\u6cd5\u76f4\u63a5\u4f7f\u7528 fig.show() \u986f\u793a\u4e92\u52d5\u5716\uff0c\u5fc5\u9808\u5b89\u88dd\u4e00\u4e9b\u5c0f\u63d2\u4ef6\u6a21\u7d44\u8207\u8a2d\u5b9a\u3002\u56e0\u6b64\u7bc4\u4f8b\u4e2d\u63a1\u7528\u6700\u7c21\u55ae\u65b9\u6cd5\uff0c\u5148\u8f49\u63db\u6210 HTML code \u4e26\u900f\u904e IPython.display \u4e2d\u7684 HTML \u65b9\u6cd5\u986f\u793a\u51fa\u4f86\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u4e5f\u80fd\u89c0\u5bdf\u6bcf\u4e00\u500b\u7368\u7acb\u7279\u5fb5\u5c0d\u65bc\u82b1\u7684\u54c1\u7a2e\u7684\u6bcf\u500b\u5206\u5e03\u72c0\u6cc1\u3002\u6bcf\u500b\u4e0d\u540c\u7684\u984f\u8272\u4ee3\u8868\u4e0d\u540c\u7684\u82b1\u6735\u54c1\u7a2e\uff0c\u6211\u5011\u53ef\u4ee5\u85c9\u7531\u53c3\u6578\u8a2d\u5b9a\u6bcf\u500b\u76f4\u65b9\u5716\u662f\u5426\u91cd\u758a\uff0c\u4ee5\u53ca\u91cd\u758a\u7684\u900f\u660e\u7a0b\u5ea6\u3002 fig = px . histogram ( df_data , x = \"sepal_width\" , color = \"species\" ) fig . update_layout ( barmode = 'overlay' ) fig . update_traces ( opacity = 0.75 ) HTML ( fig . to_html ()) \u63a5\u4e0b\u4f86\u4e00\u6a23\u900f\u904e\u76f4\u65b9\u5716\u65b9\u5f0f\u4f86\u89c0\u5bdf\u6bcf\u500b\u82b1\u6735\u54c1\u7a2e\u7684\u6578\u91cf\u3002\u5f9e\u8996\u89ba\u5316\u53ef\u4ee5\u5f88\u6e05\u695a\u5f97\u77e5\u8a72\u8cc7\u6599\u96c6\u662f\u5426\u662f\u4e00\u500b\u5e73\u7a69\u7684\u8cc7\u6599\u96c6\u3002 fig = px . histogram ( df_data , x = 'species' , y = 'sepal_width' , histfunc = 'count' , height = 300 , title = 'Histogram Chart' ) HTML ( fig . to_html ())","title":"\u76f4\u65b9\u5716"},{"location":"22.Plotly-Express/#_4","text":"\u6211\u5011\u53ef\u4ee5\u63a1\u7528 scatter_matrix \u70ba\u6bcf\u4e00\u500b\u7279\u5fb5\u5f7c\u6b64\u9593\u505a\u4e00\u500b\u95dc\u806f\u5ea6\u5206\u6790\u3002\u900f\u904e\u9019\u7a2e\u8996\u89ba\u5316\u65b9\u5f0f\u6211\u5011\u53ef\u4ee5\u5f88\u6e05\u695a\u7684\u77e5\u9053\u5169\u500b\u7279\u5fb5\u9593\u662f\u5426\u6b63\u76f8\u95dc\u8207\u8ca0\u76f8\u95dc\u3002 fig = px . scatter_matrix ( df_data , dimensions = [ \"sepal_width\" , \"sepal_length\" , \"petal_width\" , \"petal_length\" ], color = \"species\" ) HTML ( fig . to_html ())","title":"\u7279\u5fb5\u95dc\u806f\u5ea6\u5206\u6790"},{"location":"22.Plotly-Express/#_5","text":"\u6211\u5011\u4f7f\u7528\u6563\u4f48\u5716\u5c07\u82b1\u843c\u7684\u9577\u5ea6\u8207\u5bec\u5ea6\u986f\u793a\u5728\u4e8c\u7dad\u5750\u6a19\u5e73\u9762\u4e0a\u3002\u4f7f\u7528 Plotly Express \u5957\u4ef6\u4e2d\u7684 scatter \u65b9\u6cd5\uff0c\u6211\u5011\u53ef\u4ee5\u8f15\u9b06\u69cb\u5efa\u5716\u5f62\uff0c\u4e26\u653e\u5165 DataFrame \u683c\u5f0f\u7684\u8cc7\u6599\u4e26\u6307\u5b9a\u5fc5\u8981\u53c3\u6578 x \u8ef8\u4e2d\u7684\u8b8a\u6578\u548c y \u8ef8\u4e2d\u7684\u8b8a\u6578\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" ) HTML ( fig . to_html ()) \u5982\u679c\u60f3\u8981\u66f4\u6e05\u695a\u8868\u9054\u6bcf\u500b\u8cc7\u6599\u9ede\u6240\u5c0d\u61c9\u7684\u985e\u5225\uff0c\u53ef\u4ee5\u518d\u52a0\u4e0a color \u4e26\u6307\u5b9a\u7a2e\u985e\u7684\u6b04\u4f4d\u5373\u6703\u5c07\u6240\u6709\u8cc7\u6599\u81ea\u52d5\u5206\u6210\u4e09\u985e\u3002\u6b64\u5916\u6211\u5011\u4e5f\u80fd\u5920\u8a2d\u5b9a\u6ed1\u9f20\u79fb\u5230\u8cc7\u6599\u9ede\u4e0a\u6240\u986f\u793a\u7684\u8cc7\u8a0a\uff0c\u900f\u904e hover_data \u4e26\u7d66\u4e88\u6307\u5b9a\u6b04\u4f4d\u5373\u53ef\u770b\u5230\u8f38\u51fa\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , hover_data = [ 'petal_length' , 'petal_width' ]) HTML ( fig . to_html ()) Plotly Express \u4e5f\u63d0\u4f9b\u4e09\u7dad\u7684\u8996\u89ba\u5316\uff0c\u6b64\u5916\u4f7f\u7528\u8005\u4e5f\u80fd\u5920\u904e\u63a7\u5236\u8b8a\u7248\u81ea\u7531\u7684\u653e\u5927\u8207\u7e2e\u5c0f\u751a\u81f3\u65cb\u8f49\u3002\u4e0b\u5716\u7bc4\u4f8b\u4e2d\u6211\u5011\u5c07 x \u8ef8\u8a2d\u5b9a\u82b1\u843c\u5bec\u5ea6\uff0cy \u8ef8\u8a2d\u5b9a\u82b1\u843c\u9577\u5ea6\uff0cz \u8ef8\u8a2d\u5b9a\u82b1\u74e3\u5bec\u5ea6\u3002\u6b64\u5916 size \u53ef\u4ee5\u63a7\u5236\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5927\u5c0f\uff0c\u9019\u88e1\u63a1\u7528\u82b1\u74e3\u7684\u9577\u5ea6\u505a\u70ba\u6bcf\u500b\u8cc7\u6599\u9ede\u5927\u5c0f\u7684\u4f9d\u64da\u3002\u56e0\u6b64\u5f9e\u9019\u500b\u7acb\u9ad4\u7a7a\u9593\u53ef\u4ee5\u767c\u73fe\u5f9e\u82b1\u74e3\u9577\u5ea6\u5c0d\u65bc\u82b1\u7684\u7a2e\u985e\u6709\u5f88\u5f37\u7684\u95dc\u806f\u6027\u3002 fig = px . scatter_3d ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , z = \"petal_width\" , color = \"species\" , size = 'petal_length' ) HTML ( fig . to_html ())","title":"\u6563\u4f48\u5716"},{"location":"22.Plotly-Express/#_6","text":"\u6211\u5011\u53ef\u4ee5\u900f\u904e\u7bb1\u5f62\u5716\u9032\u884c\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u5206\u6790\u3002\u5f9e\u8996\u89ba\u5316\u5716\u4e2d\u53ef\u4ee5\u6e05\u695a\u5730\u77e5\u9053\u82b1\u843c\u7684\u5bec\u5ea6\u7bc4\u570d\u4ecb\u65bc 2~4.5 \u4e4b\u9593\uff0c\u4ee5\u53ca\u56db\u5206\u4f4d\u6578\u548c\u96e2\u7fa4\u503c\u7684\u8a0a\u606f\u3002 fig = px . box ( df_data , y = \"sepal_width\" ) HTML ( fig . to_html ()) \u9664\u6b64\u4e4b\u5916\u6211\u5011\u80fd\u5920\u66f4\u8fd1\u4e00\u6b65\u7684\u5206\u6790\u82b1\u843c\u5bec\u5ea6\u5c0d\u65bc\u6bcf\u500b\u54c1\u7a2e\u7684\u5206\u5e03\u72c0\u6cc1\u3002 fig = px . box ( df_data , x = \"species\" , y = \"sepal_width\" , color = \"species\" ) HTML ( fig . to_html ())","title":"\u7bb1\u5f62\u5716"},{"location":"22.Plotly-Express/#_7","text":"\u8907\u5408\u578b\u8996\u89ba\u5316\u65b9\u5f0f\u53ef\u4ee5\u540c\u6642\u9810\u89bd\u5169\u500b\u8b8a\u6578\u9593\u7684\u6563\u4f48\u5716\u8207\u7bb1\u578b\u5716\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u900f\u904e marginal_y \u8207 marginal_x \u8a2d\u7f6e\u6a6b\u8ef8\u8207\u7e31\u8ef8\u7684\u8996\u89ba\u5316\u65b9\u5f0f\uff0c\u56e0\u6b64\u5728\u4e00\u500b\u5716\u8868\u4e2d\u53ef\u4ee5\u7d50\u5408\u5169\u7a2e\u8996\u89ba\u5316\u3002\u6b64\u5916 trendline \u53ef\u4ee5\u70ba\u6563\u4f48\u5716\u7e6a\u88fd\u8da8\u52e2\u7dda\uff0c\u8a2d\u7f6e ols \u6703\u63a1\u7528\u6700\u5c0f\u5e73\u65b9\u6cd5\u4f4d\u6578\u64da\u5efa\u7acb\u4e00\u500b\u7dda\u6027\u8ff4\u6b78\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"box\" , marginal_x = \"box\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ()) \u4e5f\u53ef\u4ee5\u8a66\u8457\u5c07\u6a6b\u8ef8\u8207\u7e31\u8ef8\u6539\u6210\u76f4\u65b9\u5716\u3002 fig = px . scatter ( df_data , x = \"sepal_width\" , y = \"sepal_length\" , color = \"species\" , marginal_y = \"histogram\" , marginal_x = \"histogram\" , trendline = \"ols\" , template = \"simple_white\" ) HTML ( fig . to_html ())","title":"\u8907\u5408\u578b\u8996\u89ba\u5316\u6280\u5de7"},{"location":"22.Plotly-Express/#_8","text":"","title":"\u532f\u51fa\u5716\u7247"},{"location":"22.Plotly-Express/#_9","text":"\u76f4\u63a5\u9ede\u9078\u63a7\u5236\u9762\u677f\u7684\u76f8\u6a5f\u5716\u793a (Download plot as a png) \u53ef\u4ee5\u7acb\u5373\u4e0b\u8f09\u5716\u7247\u3002","title":"\u65b9\u6cd5\u4e00"},{"location":"22.Plotly-Express/#_10","text":"\u9996\u5148\u8981\u5b89\u88dd kaleido \u624d\u80fd\u532f\u51fa Plotly Express \u7684\u975c\u614b\u5716\u7247\u3002 !pip install kaleido \u532f\u51fa\u975c\u614b\u5716\u7247 fig . write_image ( \"./demo.png\" ) \u532f\u51fa\u7db2\u9801\u683c\u5f0f\uff0c\u4fdd\u7559\u4e92\u52d5\u5f62\u5f0f fig . write_html ( \"./demo.html\" )","title":"\u65b9\u6cd5\u4e8c"},{"location":"22.Plotly-Express/#reference","text":"Plotly Express API Doc Plotly Express GitHub \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/","text":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8cc7\u6599\u7279\u5fb5\u89c0\u5bdf\u8207\u96e2\u7fa4\u503c\u5206\u6790 \u6aa2\u8996\u8cc7\u6599\u7684\u5206\u5e03\u72c0\u614b \u504f\u5ea6 (Skewness) \u5cf0\u5ea6 (Kurtosis) \u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u8cc7\u6599\u524d\u8655\u7406 (Data Preprocessing)\uff0c\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6700\u91cd\u8981\u7684\u4e00\u90e8\u5206\u3002\u4eca\u65e5\u7684\u5167\u5bb9\u53ef\u5206\u70ba\u5169\u90e8\u4efd\uff0c\u524d\u534a\u90e8\u4efd\u7b97\u662f\u4e00\u4e9b\u5c0d\u8cc7\u6599\u7684\u89c0\u5bdf\u8207\u5206\u6790\uff0c\u5f8c\u534a\u90e8\u4e3b\u8981\u662f\u91dd\u5c0d\u7279\u5fb5 x \u9032\u884c\u7d71\u8a08\u65b9\u6cd5\u7684\u8cc7\u6599\u5206\u5e03\u89c0\u5bdf\u4ee5\u53ca\u5982\u4f55\u4fee\u6b63\u8cc7\u6599\u55ae\u5cf0\u504f\u5de6\u548c\u504f\u53f3\u7684\u5e38\u898b\u65b9\u6cd5\u3002 \u8f09\u5165\u8cc7\u6599 \u5728\u4eca\u65e5\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u63a1\u7528\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u7684\u8cc7\u6599\u96c6\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\u3002\u5176\u4e2d\u6211\u5011\u6311\u9078\u5169\u500b\u7279\u5fb5\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u5206\u5225\u6709 LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b\u3001AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b\u3002 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_boston # \u8f09\u5165\u8cc7\u6599\u96c6 boston_dataset = load_boston () # \u5c07\u8cc7\u6599\u8f49\u63db\u6210pd.DataFrame\u683c\u5f0f\u3002\u76ee\u6a19\u8f38\u51fa\u662fMEDIV\uff0c\u5269\u4e0b\u7684\u5c31\u662f\u7279\u5fb5\u5373\u70ba\u8f38\u5165\u7279\u5fb5\u3002 boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston \u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u7684 describe() \u65b9\u6cd5\u5148\u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5e73\u5747\u6578\u3001\u6a19\u6e96\u5dee\u3001\u56db\u5206\u4f4d\u6578\u4ee5\u53ca\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002 # \u67e5\u770b\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1 boston . describe () \u96e2\u7fa4\u503c\u5206\u6790 \u4ee5 LSTAT \u7279\u5fb5\u8209\u4f8b\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e boxplot \u4f86\u67e5\u770b\u8a72\u7279\u5fb5\u5728 506 \u7b46\u8cc7\u6599\u4e2d\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5e73\u5747\u503c\u7d04 12\uff0c\u6700\u5927\u503c\u63a5\u8fd1 38\uff0c\u6700\u5c0f\u503c\u63a5\u8fd1 2\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5927\u65bc 32 \u4ee5\u5916\u6709\u591a\u500b\u96f6\u6563\u7684\u6578\u64da\u9ede\uff0c\u9019\u4e9b\u8cc7\u6599\u6211\u5011\u53ef\u4ee5\u4f86\u5206\u6790\u662f\u5426\u70ba\u7570\u5e38\u9ede\u3002\u56e0\u70ba\u9019\u4e9b\u7570\u5e38\u9ede\u6240\u9020\u6210\u7684\u96e2\u7fa4\u503c\u53ef\u80fd\u6703\u9020\u6210\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u56b4\u91cd\u7684\u504f\u79fb\u3002 plt . figure ( figsize = ( 2 , 5 )) plt . boxplot ( boston [ 'LSTAT' ], showmeans = True ) plt . title ( 'LSTAT' ) plt . show () \u504f\u5ea6 & \u5cf0\u5ea6 \u504f\u5ea6 (Skewness) \u504f\u5ea6 (Skewness) \u662f\u7528\u4f86\u8861\u91cf\u8cc7\u6599\u5206\u5e03\u7684\u578b\u614b\uff0c\u540c\u6642\u4e5f\u8aaa\u660e\u8cc7\u6599\u5206\u914d\u4e0d\u5c0d\u7a31\u7684\u7a0b\u5ea6\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u53f3\u504f(\u6b63\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5927\uff0c\u6545\u5e73\u5747\u6578>\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6>0\u3002 \u504f\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5206\u5e03\u5c0d\u7a31\uff0c\u5448\u9418\u5f62\u5e38\u614b\u5206\u5e03\u3002 \u5de6\u504f(\u8ca0\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5c0f\uff0c\u6545\u5e73\u5747\u6578<\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6<0\u3002 \u5cf0\u5ea6 (Kurtosis) \u5cf0\u5ea6 (Kurtossis) \u53ef\u4ee5\u53cd\u6620\u8cc7\u6599\u7684\u5206\u5e03\u5f62\u72c0\u3002\u4f8b\u5982\u8a72\u8cc7\u6599\u662f\u5426\u6bd4\u8f03\u9ad8\u8073\u6216\u662f\u6241\u5e73\u7684\u5f62\u72c0\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u5cf0\u5ea6>0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u9ad8\u5cfd\u5cf0\u3002 \u5cf0\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u5e38\u614b\u5cf0\u3002 \u5cf0\u5ea6<0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u4f4e\u6f64\u5cf0\u3002 \u5206\u5e03\u72c0\u614b LSTAT \u7279\u5fb5\u89c0\u5bdf \u6211\u5011\u53ef\u4ee5\u767c\u73fe LSTAT \u7279\u5fb5\u5448\u73fe\u53f3\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 0.91>0 \u5448\u53f3\u504f\uff0c\u800c\u5cf0\u5ea6 0.49>0 \u5448\u73fe\u9ad8\u5cfd\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b # skewness \u8207 kurtosis skewness = round ( boston [ 'LSTAT' ] . skew (), 2 ) kurtosis = round ( boston [ 'LSTAT' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'LSTAT' ], kde = True ) plt . show () AGE \u7279\u5fb5\u89c0\u5bdf \u6211\u5011\u53ef\u4ee5\u767c\u73fe AGE \u7279\u5fb5\u5448\u73fe\u5de6\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 -0.6<0 \u5448\u5de6\u504f\uff0c\u800c\u5cf0\u5ea6 -0.97<0 \u5448\u73fe\u4f4e\u6f64\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b #skewness \u8207 kurtosis skewness = round ( boston [ 'AGE' ] . skew (), 2 ) kurtosis = round ( boston [ 'AGE' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'AGE' ], kde = True ) plt . show () \u4fee\u6b63\u8cc7\u6599\u504f\u614b\u7684\u65b9\u6cd5 \u5728\u6578\u5b78\u7d71\u8a08\u6216\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u90fd\u6703\u63d0\u51fa\u5047\u8a2d\uff0c\u524d\u63d0\u662f\u8cc7\u6599\u6a23\u672c\u662f\u5177\u6709\u5e38\u614b\u5206\u4f48\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u525b\u525b\u6240\u8b1b\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u4f86\u8a55\u4f30\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u614b\uff0c\u6216\u662f\u900f\u904e\u76f4\u65b9\u5716\u8207\u6838\u5bc6\u5ea6\u4f30\u8a08\u8996\u89ba\u5316\u67e5\u770b\u8cc7\u6599\u5206\u5e03\u3002\u7576\u8cc7\u6599\u5448\u73fe\u55ae\u5cf0\u504f\u659c\u6642\uff0c\u6211\u5011\u6703\u900f\u904e\u4e00\u4e9b\u8cc7\u6599\u8f49\u63db\u6280\u5de7\uff0c\u8b93\u6240\u6709\u8cc7\u6599\u80fd\u5920\u4fee\u6b63\u56de\u5e38\u614b\u5206\u4f48\u3002\u4ee5\u4e0b\u6574\u5e7e\u5e7e\u500b\u5e38\u898b\u7684\u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5\uff1a \u5c0d\u6578\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u67090\u6216\u8ca0\u6578) \u5e73\u65b9\u6839\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u662f\u8ca0\u6578) \u7acb\u65b9\u6839\u8f49\u63db \u6b21\u65b9\u8f49\u63db (\u53ea\u80fd\u8655\u7406\u5de6\u504f) Box-Cox \u8f49\u63db \u79fb\u9664\u96e2\u7fa4\u503c \u5c0d\u6578\u8f49\u63db \u56e0\u70ba\u8cc7\u6599\u578b\u614b\u5de6\u504f\uff0c\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53d6\u5c0d\u6578\u4f86\u5c07\u8cc7\u6599\u62c9\u56de\u4f7f\u70ba\u66f4\u96c6\u4e2d\u3002 transform_data = np . log ( boston [ 'LSTAT' ]) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u5e73\u65b9\u6839\u8f49\u63db transform_data = boston [ 'LSTAT' ] ** ( 1 / 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u7acb\u65b9\u6839\u8f49\u63db transform_data = boston [ 'LSTAT' ] ** ( 1 / 3 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u6b21\u65b9\u8f49\u63db \u6b21\u65b9\u8f49\u63db\u50c5\u80fd\u4f7f\u7528\u5728\u504f\u5de6\u7684\u8cc7\u6599\u4e0a\u3002 transform_data = np . power ( boston [ 'AGE' ], 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u539f\u672c\u7684\u8cc7\u6599\u5206\u5e03\u4f4e\u6f64\u5cf0\u4e14\u6709\u9ede\u96d9\u5cf0\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u8f49\u63db\u51fa\u4f86\u6703\u6709\u5169\u5ea7\u5c71\u7684\u611f\u89ba\u3002 Box-Cox \u8f49\u63db from scipy.stats import boxcox transform_data , lam = boxcox ( boston [ 'LSTAT' ]) transform_data = pd . DataFrame ( transform_data , columns = [ 'LSTAT' ])[ 'LSTAT' ] # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u79fb\u9664\u96e2\u7fa4\u503c \u5728 Q3\uff0b1.5IQR\uff08\u56db\u5206\u4f4d\u8ddd\uff09\u548c Q1-1.5IQR \u8655\u756b\u5169\u689d\u8207\u4e2d\u4f4d\u7dda\u4e00\u6a23\u7684\u7dda\u6bb5\uff0c\u9019\u5169\u689d\u7dda\u6bb5\u70ba\u7570\u5e38\u503c\u622a\u65b7\u9ede\uff0c\u7a31\u5176\u70ba\u5167\u9650\u3002\u5728 Q3\uff0b3IQR \u548cQ1\uff0d3IQR \u8655\u756b\u5169\u689d\u7dda\u6bb5\u7a31\u5176\u70ba\u5916\u9650\u3002\u8655\u65bc\u5167\u9650\u4ee5\u5916\u4f4d\u7f6e\u7684\u9ede\u8868\u793a\u7684\u6578\u64da\u90fd\u662f\u7570\u5e38\u503c\uff0c\u5176\u4e2d\u5728\u5167\u9650\u8207\u5916\u9650\u4e4b\u9593\u7684\u7570\u5e38\u503c\u70ba\u6eab\u548c\u7684\u7570\u5e38\u503c\uff08mild outliers\uff09\uff0c\u5728\u5916\u9650\u4ee5\u5916\u7684\u70ba\u6975\u7aef\u7684\u7570\u5e38\u503c (extreme outliers)\u3002 # \u5c07\u6240\u6709\u7279\u5fb5\u8d85\u51fa1.5\u500dIQR\u7684\u6982\u5ff5\u5c07\u9019\u4e9bOutlier\u5148\u53bb\u6389\uff0c\u907f\u514d\u5c0dModel\u9020\u6210\u5f71\u97ff\u3002 print ( \"Shape Of The Before Ouliers: \" , boston [ 'LSTAT' ] . shape ) n = 1.5 #IQR = Q3-Q1 IQR = np . percentile ( boston [ 'LSTAT' ], 75 ) - np . percentile ( boston [ 'LSTAT' ], 25 ) # outlier = Q3 + n*IQR transform_data = boston [ boston [ 'LSTAT' ] < np . percentile ( boston [ 'LSTAT' ], 75 ) + n * IQR ] # outlier = Q1 - n*IQR transform_data = transform_data [ transform_data [ 'LSTAT' ] > np . percentile ( transform_data [ 'LSTAT' ], 25 ) - n * IQR ][ 'LSTAT' ] print ( \"Shape Of The After Ouliers: \" , transform_data . shape ) \u6211\u5011\u5fc5\u9808\u5c07\u8d85\u51fa 1.5 \u500d\u7684\u6975\u7aef\u7570\u5e38\u503c\u6e05\u6389\u3002\u5171\u6709 7 \u7b46\u8cc7\u6599\u88ab\u79fb\u9664\u6389\u3002 \u8f38\u51fa\u7d50\u679c\uff1a Shape Of The Before Ouliers: (506,) Shape Of The After Ouliers: (499,) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#day-23","text":"","title":"[Day 23] \u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_1","text":"\u8cc7\u6599\u7279\u5fb5\u89c0\u5bdf\u8207\u96e2\u7fa4\u503c\u5206\u6790 \u6aa2\u8996\u8cc7\u6599\u7684\u5206\u5e03\u72c0\u614b \u504f\u5ea6 (Skewness) \u5cf0\u5ea6 (Kurtosis) \u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_2","text":"\u8cc7\u6599\u524d\u8655\u7406 (Data Preprocessing)\uff0c\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6700\u91cd\u8981\u7684\u4e00\u90e8\u5206\u3002\u4eca\u65e5\u7684\u5167\u5bb9\u53ef\u5206\u70ba\u5169\u90e8\u4efd\uff0c\u524d\u534a\u90e8\u4efd\u7b97\u662f\u4e00\u4e9b\u5c0d\u8cc7\u6599\u7684\u89c0\u5bdf\u8207\u5206\u6790\uff0c\u5f8c\u534a\u90e8\u4e3b\u8981\u662f\u91dd\u5c0d\u7279\u5fb5 x \u9032\u884c\u7d71\u8a08\u65b9\u6cd5\u7684\u8cc7\u6599\u5206\u5e03\u89c0\u5bdf\u4ee5\u53ca\u5982\u4f55\u4fee\u6b63\u8cc7\u6599\u55ae\u5cf0\u504f\u5de6\u548c\u504f\u53f3\u7684\u5e38\u898b\u65b9\u6cd5\u3002","title":"\u524d\u8a00"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_3","text":"\u5728\u4eca\u65e5\u7684\u7bc4\u4f8b\u4e2d\u6211\u5011\u63a1\u7528\u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c\u7684\u8cc7\u6599\u96c6\u3002\u6b64\u8cc7\u6599\u96c6\u5171\u6709 506 \u7b46\u8cc7\u6599\u3002\u5176\u4e2d\u6211\u5011\u6311\u9078\u5169\u500b\u7279\u5fb5\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u5206\u5225\u6709 LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b\u3001AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b\u3002 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_boston # \u8f09\u5165\u8cc7\u6599\u96c6 boston_dataset = load_boston () # \u5c07\u8cc7\u6599\u8f49\u63db\u6210pd.DataFrame\u683c\u5f0f\u3002\u76ee\u6a19\u8f38\u51fa\u662fMEDIV\uff0c\u5269\u4e0b\u7684\u5c31\u662f\u7279\u5fb5\u5373\u70ba\u8f38\u5165\u7279\u5fb5\u3002 boston = pd . DataFrame ( boston_dataset . data , columns = boston_dataset . feature_names ) boston [ 'MEDV' ] = boston_dataset . target boston \u6211\u5011\u53ef\u4ee5\u900f\u904e Pandas \u7684 describe() \u65b9\u6cd5\u5148\u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5e73\u5747\u6578\u3001\u6a19\u6e96\u5dee\u3001\u56db\u5206\u4f4d\u6578\u4ee5\u53ca\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002 # \u67e5\u770b\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1 boston . describe ()","title":"\u8f09\u5165\u8cc7\u6599"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_4","text":"\u4ee5 LSTAT \u7279\u5fb5\u8209\u4f8b\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e boxplot \u4f86\u67e5\u770b\u8a72\u7279\u5fb5\u5728 506 \u7b46\u8cc7\u6599\u4e2d\u7684\u5206\u5e03\u72c0\u6cc1\uff0c\u6211\u5011\u53ef\u4ee5\u770b\u51fa\u5e73\u5747\u503c\u7d04 12\uff0c\u6700\u5927\u503c\u63a5\u8fd1 38\uff0c\u6700\u5c0f\u503c\u63a5\u8fd1 2\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5927\u65bc 32 \u4ee5\u5916\u6709\u591a\u500b\u96f6\u6563\u7684\u6578\u64da\u9ede\uff0c\u9019\u4e9b\u8cc7\u6599\u6211\u5011\u53ef\u4ee5\u4f86\u5206\u6790\u662f\u5426\u70ba\u7570\u5e38\u9ede\u3002\u56e0\u70ba\u9019\u4e9b\u7570\u5e38\u9ede\u6240\u9020\u6210\u7684\u96e2\u7fa4\u503c\u53ef\u80fd\u6703\u9020\u6210\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u56b4\u91cd\u7684\u504f\u79fb\u3002 plt . figure ( figsize = ( 2 , 5 )) plt . boxplot ( boston [ 'LSTAT' ], showmeans = True ) plt . title ( 'LSTAT' ) plt . show ()","title":"\u96e2\u7fa4\u503c\u5206\u6790"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_5","text":"","title":"\u504f\u5ea6 & \u5cf0\u5ea6"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#skewness","text":"\u504f\u5ea6 (Skewness) \u662f\u7528\u4f86\u8861\u91cf\u8cc7\u6599\u5206\u5e03\u7684\u578b\u614b\uff0c\u540c\u6642\u4e5f\u8aaa\u660e\u8cc7\u6599\u5206\u914d\u4e0d\u5c0d\u7a31\u7684\u7a0b\u5ea6\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u53f3\u504f(\u6b63\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5927\uff0c\u6545\u5e73\u5747\u6578>\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6>0\u3002 \u504f\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5206\u5e03\u5c0d\u7a31\uff0c\u5448\u9418\u5f62\u5e38\u614b\u5206\u5e03\u3002 \u5de6\u504f(\u8ca0\u504f)\uff0c\u8868\u793a\u6709\u5c11\u6578\u5e7e\u7b46\u8cc7\u6599\u5f88\u5c0f\uff0c\u6545\u5e73\u5747\u6578<\u4e2d\u4f4d\u6578\uff0c\u6240\u4ee5\u504f\u5ea6<0\u3002","title":"\u504f\u5ea6 (Skewness)"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#kurtosis","text":"\u5cf0\u5ea6 (Kurtossis) \u53ef\u4ee5\u53cd\u6620\u8cc7\u6599\u7684\u5206\u5e03\u5f62\u72c0\u3002\u4f8b\u5982\u8a72\u8cc7\u6599\u662f\u5426\u6bd4\u8f03\u9ad8\u8073\u6216\u662f\u6241\u5e73\u7684\u5f62\u72c0\u3002\u5176\u5224\u5225\u65b9\u5f0f\u5982\u4e0b\uff1a \u5cf0\u5ea6>0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u9ad8\u5cfd\u5cf0\u3002 \u5cf0\u5ea6=0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u5e38\u614b\u5cf0\u3002 \u5cf0\u5ea6<0 \u8868\u793a\u8cc7\u6599\u5448\u73fe\u4f4e\u6f64\u5cf0\u3002","title":"\u5cf0\u5ea6 (Kurtosis)"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_6","text":"","title":"\u5206\u5e03\u72c0\u614b"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#lstat","text":"\u6211\u5011\u53ef\u4ee5\u767c\u73fe LSTAT \u7279\u5fb5\u5448\u73fe\u53f3\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 0.91>0 \u5448\u53f3\u504f\uff0c\u800c\u5cf0\u5ea6 0.49>0 \u5448\u73fe\u9ad8\u5cfd\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f LSTAT: \u5340\u57df\u4e2d\u88ab\u8a8d\u70ba\u662f\u4f4e\u6536\u5165\u968e\u5c64\u7684\u6bd4\u4f8b # skewness \u8207 kurtosis skewness = round ( boston [ 'LSTAT' ] . skew (), 2 ) kurtosis = round ( boston [ 'LSTAT' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'LSTAT' ], kde = True ) plt . show ()","title":"LSTAT \u7279\u5fb5\u89c0\u5bdf"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#age","text":"\u6211\u5011\u53ef\u4ee5\u767c\u73fe AGE \u7279\u5fb5\u5448\u73fe\u5de6\u504f\u3002\u900f\u904e Pandas \u8a08\u7b97\u8a72\u7279\u5fb5\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u3002\u7531\u7d50\u679c\u53ef\u4ee5\u5f97\u77e5\u504f\u5ea6 -0.6<0 \u5448\u5de6\u504f\uff0c\u800c\u5cf0\u5ea6 -0.97<0 \u5448\u73fe\u4f4e\u6f64\u5cf0\u5f62\u72c0\u3002 # \u4f7f\u7528\u7684\u8cc7\u6599\u662f AGE: 1940\u5e74\u4e4b\u524d\u5efa\u6210\u7684\u81ea\u7528\u623f\u5c4b\u6bd4\u4f8b #skewness \u8207 kurtosis skewness = round ( boston [ 'AGE' ] . skew (), 2 ) kurtosis = round ( boston [ 'AGE' ] . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( boston [ 'AGE' ], kde = True ) plt . show ()","title":"AGE \u7279\u5fb5\u89c0\u5bdf"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_7","text":"\u5728\u6578\u5b78\u7d71\u8a08\u6216\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u6211\u5011\u90fd\u6703\u63d0\u51fa\u5047\u8a2d\uff0c\u524d\u63d0\u662f\u8cc7\u6599\u6a23\u672c\u662f\u5177\u6709\u5e38\u614b\u5206\u4f48\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u525b\u525b\u6240\u8b1b\u7684\u504f\u5ea6\u8207\u5cf0\u5ea6\u4f86\u8a55\u4f30\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u614b\uff0c\u6216\u662f\u900f\u904e\u76f4\u65b9\u5716\u8207\u6838\u5bc6\u5ea6\u4f30\u8a08\u8996\u89ba\u5316\u67e5\u770b\u8cc7\u6599\u5206\u5e03\u3002\u7576\u8cc7\u6599\u5448\u73fe\u55ae\u5cf0\u504f\u659c\u6642\uff0c\u6211\u5011\u6703\u900f\u904e\u4e00\u4e9b\u8cc7\u6599\u8f49\u63db\u6280\u5de7\uff0c\u8b93\u6240\u6709\u8cc7\u6599\u80fd\u5920\u4fee\u6b63\u56de\u5e38\u614b\u5206\u4f48\u3002\u4ee5\u4e0b\u6574\u5e7e\u5e7e\u500b\u5e38\u898b\u7684\u4fee\u6b63\u7279\u5fb5\u504f\u5ea6\u7684\u65b9\u6cd5\uff1a \u5c0d\u6578\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u67090\u6216\u8ca0\u6578) \u5e73\u65b9\u6839\u8f49\u63db (\u8cc7\u6599\u4e0d\u80fd\u662f\u8ca0\u6578) \u7acb\u65b9\u6839\u8f49\u63db \u6b21\u65b9\u8f49\u63db (\u53ea\u80fd\u8655\u7406\u5de6\u504f) Box-Cox \u8f49\u63db \u79fb\u9664\u96e2\u7fa4\u503c","title":"\u4fee\u6b63\u8cc7\u6599\u504f\u614b\u7684\u65b9\u6cd5"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_8","text":"\u56e0\u70ba\u8cc7\u6599\u578b\u614b\u5de6\u504f\uff0c\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53d6\u5c0d\u6578\u4f86\u5c07\u8cc7\u6599\u62c9\u56de\u4f7f\u70ba\u66f4\u96c6\u4e2d\u3002 transform_data = np . log ( boston [ 'LSTAT' ]) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u5c0d\u6578\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_9","text":"transform_data = boston [ 'LSTAT' ] ** ( 1 / 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u5e73\u65b9\u6839\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_10","text":"transform_data = boston [ 'LSTAT' ] ** ( 1 / 3 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"\u7acb\u65b9\u6839\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_11","text":"\u6b21\u65b9\u8f49\u63db\u50c5\u80fd\u4f7f\u7528\u5728\u504f\u5de6\u7684\u8cc7\u6599\u4e0a\u3002 transform_data = np . power ( boston [ 'AGE' ], 2 ) # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show () \u539f\u672c\u7684\u8cc7\u6599\u5206\u5e03\u4f4e\u6f64\u5cf0\u4e14\u6709\u9ede\u96d9\u5cf0\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u8f49\u63db\u51fa\u4f86\u6703\u6709\u5169\u5ea7\u5c71\u7684\u611f\u89ba\u3002","title":"\u6b21\u65b9\u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#box-cox","text":"from scipy.stats import boxcox transform_data , lam = boxcox ( boston [ 'LSTAT' ]) transform_data = pd . DataFrame ( transform_data , columns = [ 'LSTAT' ])[ 'LSTAT' ] # skewness \u8207 kurtosis skewness = round ( transform_data . skew (), 2 ) kurtosis = round ( transform_data . kurt (), 2 ) print ( f \"\u504f\u5ea6(Skewness): { skewness } , \u5cf0\u5ea6(Kurtosis): { kurtosis } \" ) # \u7e6a\u88fd\u5206\u5e03\u5716 sns . histplot ( transform_data , kde = True ) plt . show ()","title":"Box-Cox \u8f49\u63db"},{"location":"23.\u8cc7\u6599\u5206\u5e03\u8207\u96e2\u7fa4\u503c\u8655\u7406/#_12","text":"\u5728 Q3\uff0b1.5IQR\uff08\u56db\u5206\u4f4d\u8ddd\uff09\u548c Q1-1.5IQR \u8655\u756b\u5169\u689d\u8207\u4e2d\u4f4d\u7dda\u4e00\u6a23\u7684\u7dda\u6bb5\uff0c\u9019\u5169\u689d\u7dda\u6bb5\u70ba\u7570\u5e38\u503c\u622a\u65b7\u9ede\uff0c\u7a31\u5176\u70ba\u5167\u9650\u3002\u5728 Q3\uff0b3IQR \u548cQ1\uff0d3IQR \u8655\u756b\u5169\u689d\u7dda\u6bb5\u7a31\u5176\u70ba\u5916\u9650\u3002\u8655\u65bc\u5167\u9650\u4ee5\u5916\u4f4d\u7f6e\u7684\u9ede\u8868\u793a\u7684\u6578\u64da\u90fd\u662f\u7570\u5e38\u503c\uff0c\u5176\u4e2d\u5728\u5167\u9650\u8207\u5916\u9650\u4e4b\u9593\u7684\u7570\u5e38\u503c\u70ba\u6eab\u548c\u7684\u7570\u5e38\u503c\uff08mild outliers\uff09\uff0c\u5728\u5916\u9650\u4ee5\u5916\u7684\u70ba\u6975\u7aef\u7684\u7570\u5e38\u503c (extreme outliers)\u3002 # \u5c07\u6240\u6709\u7279\u5fb5\u8d85\u51fa1.5\u500dIQR\u7684\u6982\u5ff5\u5c07\u9019\u4e9bOutlier\u5148\u53bb\u6389\uff0c\u907f\u514d\u5c0dModel\u9020\u6210\u5f71\u97ff\u3002 print ( \"Shape Of The Before Ouliers: \" , boston [ 'LSTAT' ] . shape ) n = 1.5 #IQR = Q3-Q1 IQR = np . percentile ( boston [ 'LSTAT' ], 75 ) - np . percentile ( boston [ 'LSTAT' ], 25 ) # outlier = Q3 + n*IQR transform_data = boston [ boston [ 'LSTAT' ] < np . percentile ( boston [ 'LSTAT' ], 75 ) + n * IQR ] # outlier = Q1 - n*IQR transform_data = transform_data [ transform_data [ 'LSTAT' ] > np . percentile ( transform_data [ 'LSTAT' ], 25 ) - n * IQR ][ 'LSTAT' ] print ( \"Shape Of The After Ouliers: \" , transform_data . shape ) \u6211\u5011\u5fc5\u9808\u5c07\u8d85\u51fa 1.5 \u500d\u7684\u6975\u7aef\u7570\u5e38\u503c\u6e05\u6389\u3002\u5171\u6709 7 \u7b46\u8cc7\u6599\u88ab\u79fb\u9664\u6389\u3002 \u8f38\u51fa\u7d50\u679c\uff1a Shape Of The Before Ouliers: (506,) Shape Of The After Ouliers: (499,) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u79fb\u9664\u96e2\u7fa4\u503c"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/","text":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u6df1\u5165\u7406\u89e3\u5ea6\u64ec\u5408\u8207\u6b20\u64ec\u5408 Bias-Variance Tradeoff \u5982\u4f55\u907f\u514d\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408\uff1f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5927\u5bb6\u53ef\u80fd\u6700\u5e38\u898b\u7684\u554f\u984c\u662f\uff0c\u7576\u8a13\u7df4\u597d\u4e86\u6a21\u578b\u4e26\u5728\u6e2c\u8a66\u8cc7\u6599\u4e5f\u7372\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\u3002\u65bc\u662f\u5f88\u958b\u5fc3\u7684\u843d\u5730\u4e26\u90e8\u7f72\u5230\u771f\u5be6\u5834\u57df\u4e2d\uff0c\u6b8a\u4e0d\u77e5\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u9060\u9060\u4e0d\u5982\u9810\u671f\u3002\u6211\u60f3\u9019\u500b\u75db\u9ede\u5927\u5bb6\u61c9\u8a72\u90fd\u7d93\u6b77\u904e\uff0c\u5c24\u5176\u662f\u6a5f\u5668\u5b78\u7fd2\u7684\u65b0\u624b\u3002\u9019\u7a2e\u60c5\u6cc1\u5c31\u662f\u6240\u8b02\u7684\u904e\u5ea6\u64ec\u5408\uff0c\u5b83\u662f\u4e00\u500b\u5728\u6a5f\u5668\u5b78\u7fd2\u9818\u57df\u4e2d\u975e\u5e38\u68d8\u624b\u7684\u7684\u554f\u984c\u3002\u7576\u4f60\u7684\u6a21\u578b\u904e\u5ea6\u7684\u64ec\u5408\u8a13\u7df4\u96c6\uff0c\u9019\u610f\u5473\u8457\u4f60\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u7684\u53bb\u8a18\u4f4f\u6240\u6709\u73fe\u6709\u7684\u6578\u64da\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u4e0d\u4f73\uff0c\u9019\u4e0d\u662f\u6211\u5011\u671f\u671b\u7684\u3002\u6240\u8b02\u7684\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u662f\u6307\uff0c\u7576\u6211\u5229\u7528\u8a13\u7df4\u96c6\u8a13\u7df4\u4e00\u500b\u6a21\u578b\u5f8c\u518d\u62ff\u53e6\u4e00\u7d44\u6a21\u578b\u6c92\u770b\u904e\u7684\u8cc7\u6599\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u7684\u9810\u6e2c\u7d50\u679c\u5982\u679c\u5728\u6c92\u770b\u904e\u7684\u8cc7\u6599\u4e2d\u4f9d\u7136\u4fdd\u6301\u4e0d\u932f\u7684\u8868\u73fe\u6211\u5011\u5c31\u53ef\u4ee5\u8aaa\u6b64\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u5f37\u3002\u4eca\u5929\u6211\u5011\u5c07\u4f86\u8a73\u7d30\u63a2\u8a0e\u4f55\u8b02\u904e\u5ea6\u64ec\u5408\uff0c\u4ee5\u53ca\u8a72\u5982\u4f55\u53bb\u89e3\u6c7a\u5b83\u4f7f\u5f97\u6a21\u578b\u8655\u65bc\u4e00\u500b\u9069\u7576\u7684\u72c0\u614b\u3002 \u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u901a\u5e38\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u8981\u8207\u5be6\u969b\u7684\u6578\u503c\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u4e5f\u5c31\u662f\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6211\u5011\u8981\u60f3\u8fa6\u6cd5\u6700\u5c0f\u5316\u8aa4\u5dee\u4f7f\u5f97\u6a21\u578b\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8a55\u4f30\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u597d\u58de\u5462\uff1f\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u8981\u8a13\u7df4\u4e00\u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u6700\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u627e\u51fa\u4e00\u689d\u7dda\u5920\u5c07\u9019\u5169\u500b\u985e\u5225\u5b8c\u6574\u5730\u5206\u958b\uff0c\u7136\u800c\u9019\u4e00\u689d\u5207\u5272\u7684\u7dda\u8981\u9577\u5f97\u600e\u6a23\u624d\u662f\u597d\u7684\u6a21\u578b\u5462\uff1f\u5f9e\u4e0b\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5b8c\u6574\u7684\u64ec\u5408\u65bc\u8a13\u7df4\u8cc7\u6599\uff0c\u800c\u7d05\u8272\u5be6\u7dda\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u6bd4\u8f03\u6c92\u6709\u90a3\u9ebc\u56b4\u53b2\uff0c\u5728\u5169\u500b\u985e\u5225\u9593\u9069\u7576\u7684\u627e\u51fa\u4e00\u689d\u5e73\u6ed1\u7684\u66f2\u7dda\u4f86\u5340\u9694\u5169\u985e\u7684\u8cc7\u6599\u3002 \u63a5\u8457\u6211\u5011\u62ff\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u9810\u6e2c\uff0c\u53ef\u4ee5\u767c\u73fe\u7531\u65bc\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u5b8c\u6574\u8a18\u4f4f\u4e86\u8a13\u7df4\u96c6\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u5728\u65b0\u7684\u6c92\u770b\u904e\u7684\u8cc7\u6599\u8868\u73fe\u5c31\u6c92\u6709\u90a3\u9ebc\u597d\u4e86\u3002\u5c24\u5176\u662f\u5728\u5169\u985e\u5225\u5206\u9694\u7dda\u9644\u8fd1\u7684\u8cc7\u6599\u6700\u80fd\u770b\u51fa\u7aef\u502a\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u5f88\u78ba\u5b9a\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u4e86\u3002\u53e6\u5916\u7d05\u8272\u5be6\u73fe\u7684\u6a21\u578b\u96d6\u7136\u5728\u8a13\u7df4\u96c6\u4e2d\u6709\u5e7e\u7b46\u6703\u9810\u6e2c\u932f\u8aa4\uff0c\u4f46\u662f\u5b83\u518d\u6e2c\u8a66\u96c6\u8cc7\u6599\u4e2d\u4e00\u6a23\u4fdd\u6301\u7a69\u5b9a\u7684\u9810\u6e2c\u80fd\u529b\u3002 \u5f9e\u4e0a\u8ff0\u7684\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\uff0c\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e26\u975e\u8a13\u7df4\u96c6\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u6211\u5011\u5fc5\u9808\u540c\u6642\u62ff\u6e2c\u8a66\u96c6\u9a57\u8b49\u6a21\u578b\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u8aa4\u5dee\u8981\u8d8a\u8fd1\u8d8a\u597d\u3002 \u4e00\u500b\u9069\u7576\u7684\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\u5305\u62ec\uff1a \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u8cc7\u6599\u8996\u89ba\u5316\u8207\u524d\u8655\u7406 \u5c0b\u627e\u9069\u5408\u7684\u6a21\u578b \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578 \u4f7f\u7528\u9069\u7576\u7684\u6307\u6a19\u8a55\u4f30\u6a21\u578b \u4ea4\u53c9\u9a57\u8b49\u6a21\u578b Overfitting vs. Underfitting \u904e\u5ea6\u64ec\u5408\u7684\u53cd\u7fa9\u5c31\u662f\u6b20\u64ec\u5408\uff0c\u5f9e\u5b57\u9762\u4e0a\u53ef\u4ee5\u5f97\u77e5\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u662f\u4e0d\u597d\u7684\u3002\u7576\u6a21\u578b\u592a\u7c21\u55ae\u6642\u6703\u767c\u751f\u6b20\u64ec\u5408\uff0c\u6216\u662f\u52a0\u5165\u592a\u591a\u7684 L1/L2 \u6b63\u5247\u5316\u9650\u5236\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u4f7f\u6a21\u578b\u5728\u5f9e\u6578\u64da\u96c6\u4e2d\u5b78\u7fd2\u6642\u8b8a\u5f97\u4e0d\u9748\u6d3b\u3002\u4e00\u500b\u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u5728\u9810\u6e2c\u4e2d\u5f80\u5f80\u5177\u6709\u8f03\u5c0f\u7684\u65b9\u5dee(variance)\u800c\u5c0e\u81f4\u504f\u5dee(bias)\u5c31\u6703\u8b8a\u5927\u3002\u76f8\u53cd\u7684\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u6709\u8f03\u7684\u8b8a\u7570\u9032\u800c\u5c0e\u81f4\u65b9\u5dee\u5927\uff0c\u540c\u6642\u504f\u5dee\u6703\u8b8a\u5c0f\u3002\u504f\u5dee\u548c\u65b9\u5dee\u90fd\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684\u9810\u6e2c\u8aa4\u5dee\u7684\u65b9\u5f0f\u3002\u5728\u4e00\u822c\u60c5\u6cc1\u4e0b\u6211\u5011\u53ef\u4ee5\u6e1b\u5c11\u504f\u5dee\u6240\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u4f46\u53ef\u80fd\u6703\u5c0e\u81f4\u589e\u52a0\u65b9\u5dee\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u53cd\u4e4b\u4ea6\u7136\u3002 \u9019\u88e1\u6211\u5011\u5c31\u8981\u4f86\u601d\u8003\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e2d\u7684 error \u5f9e\u4f55\u800c\u4f86\uff1f\u6a21\u578b\u4e2d\u7684 error \u662f\u5224\u65b7\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u4f9d\u64da\uff0c\u4f46\u5176\u5be6\u6211\u5011\u53ef\u4ee5\u5c07 error \u62c6\u5206\u6210\u5169\u5927\u90e8\u5206\u3002\u5206\u5225\u6709 Bias \u8207 Variance \u5169\u500b\u90e8\u5206\u3002\u4ee5\u5be6\u969b\u4f8b\u5b50\u4f86\u8aaa\uff0c\u5047\u8a2d\u8f38\u51fa y \u662f\u8f38\u5165 x \u771f\u6b63\u7684\u7b54\u6848\uff0c\u800c \u0177 \u5247\u662f\u900f\u904e\u6a21\u578b f(x) \u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u503c\uff0c\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u7684\u7d50\u679c\u8981\u8207\u771f\u5be6\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u7576 \u0177\u2260y \u6642\u5c31\u6703\u7522\u751f error (\u8aa4\u5dee)\u3002 Bias-Variance Tradeoff \u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u5b58\u5728\u8457\u4e00\u4e9b\u95dc\u4fc2\uff0c\u6211\u5011\u5fc5\u9808\u5f9e\u4e2d\u627e\u5230\u4e00\u500b\u9069\u7576\u7684\u5e73\u8861\u9ede\u3002\u56e0\u6b64\u6211\u5011\u5e0c\u671b\u900f\u904e\u6b0a\u8861 bias error \u8ddf variance error \u4f86\u4f7f\u5f97\u7e3d\u8aa4\u5dee\u9054\u5230\u6700\u5c0f\u3002\u6211\u5011\u5e38\u6703\u4ee5\u6253\u9776\u4f8b\u5b50\u89e3\u91cb\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u7684\u95dc\u806f\u6027\u3002\u5047\u8a2d\u6211\u5011\u767c\u5c04\u5341\u6b21\uff0c\u6211\u5011\u8aac\u4e00\u500b\u4eba\u7684\u6253\u9776\u6280\u8853\u5f88\u7cbe\u6e96\u3002\u5176\u4e2d\u7684 \u7cbe \u5c31\u8868\u793a\u9019\u5341\u500b\u628a\u9762\u4e0a\u7684\u9ede\u5f7c\u6b64\u9593\u8ddd\u96e2\u90fd\u76f8\u7576\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u7684\u65b9\u5dee\u975e\u5e38\u4f4e(low variance)\u3002\u53e6\u5916\u6240\u8b02\u7684 \u6e96 \u5c31\u8868\u793a\u9019\u5341\u500b\u9ede\u90fd\u96e2\u6e96\u5fc3\u5f88\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u5011\u7684\u504f\u5dee\u975e\u5e38\u4f4e(low bias)\u3002 Underfitting: \u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u4f7f\u5f97\u9810\u6e2c\u7d50\u679c\u5f48\u6027\u4e0d\u9ad8\uff0c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u90fd\u4e0d\u597d\u3002low variance (high bias)\u3002 Overfitting: \u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u4f7f\u5f97\u8a13\u7df4\u96c6\u5b8c\u6574\u7684\u88ab\u64ec\u5408\uff0c\u56e0\u6b64\u8a13\u7df4\u96c6\u8868\u73fe\u6975\u597d\uff0c\u4f46\u6e2c\u8a66\u96c6\u8868\u73fe\u4e0d\u4f73\u3002high variance (low bias)\u3002 Error from Bias \u504f\u5dee(bias)\u5c31\u662f\u6a21\u578b\u7684\u9810\u6e2c\u8207\u771f\u5be6\u503c\u4e4b\u9593\u7684\u5dee\u7570\u3002\u4e00\u822c\u6211\u5011\u8a13\u7df4\u6a21\u578b\u662f\u671f\u671b\u9810\u6e2c\u7684\u503c\u8981\u8207\u5be6\u969b\u7684\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u7136\u800c\u7576\u4e00\u500b\u7c21\u55ae\u7684\u7dda\u6027\u6a21\u578b\u53ef\u80fd\u7121\u6cd5\u5b8c\u6574\u5730\u64ec\u5408\u5230\u4e00\u500b\u8907\u96dc\u975e\u7dda\u6027\u7684\u8cc7\u6599\u96c6\u3002\u56e0\u6b64\u5982\u4e0b\u5716\u6240\u793a\uff0c\u7576\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u504f\u5dee\u904e\u5927\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u8a72\u6a21\u578b\u904e\u65bc\u7c21\u55ae\u3002\u7121\u8ad6\u641c\u96c6\u518d\u591a\u7684\u8cc7\u6599\uff0c\u7dda\u6027\u7684\u6a21\u578b\u6c38\u9060\u7121\u6cd5\u64ec\u5408\u975e\u7dda\u6027\u7684\u66f2\u7dda\u3002\u56e0\u70ba\u6bd4\u8f03\u7c21\u55ae\u7684\u6a21\u578b\uff0c\u4ed6\u53d7\u5230\u4e0d\u540c\u7684\u8cc7\u6599\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002 \u7c21\u55ae\u7684\u6a21\u578b\u6709\u5927\u7684 bias\uff0c\u5c0f\u7684 variance\u3002 Error \u4f86\u81ea\u65bc bias \u5f88\u5927\uff0c\u7a31\u70ba\u6b20\u64ec\u5408\u3002 Error from Variance \u65b9\u5dee(variance)\u662f\u6307\u4f60\u7684\u6a21\u578b\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u654f\u611f\u7a0b\u5ea6\u3002\u4e00\u500b\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u5c0e\u81f4\u8f38\u51fa\u7684\u8b8a\u7570\u6027\u975e\u5e38\u5927\u3002\u6a21\u578b\u6b7b\u80cc\u6240\u6709\u8a13\u7df4\u96c6\u4e2d\u7684\u6578\u64da\u9ede\u6703\u5c0e\u81f4\u4e00\u500b\u554f\u984c\u767c\u751f\u3002\u7576\u4f60\u7684\u8a13\u7df4\u8cc7\u6599\u6709\u9700\u591a\u7684\u96a8\u6a5f\u8aa4\u5dee\u6216\u662f\u96e2\u7fa4\u503c\u6642\uff0c\u6211\u5011\u53c8\u628a\u9019\u4e9b\u7570\u5e38\u503c\u5168\u90e8\u64ec\u5408\u9032\u6a21\u578b\u88e1\u9762\uff0c\u5c0e\u81f4\u5b78\u51fa\u4f86\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u540c\u6642\u964d\u4f4e\u6cdb\u5316\u80fd\u529b\uff0c\u5c0d\u65bc\u672a\u77e5\u7684\u8cc7\u6599\u9810\u6e2c\u7684\u80fd\u529b\u5c31\u6703\u5f88\u5dee\uff0c\u540c\u6642\u9020\u5c31\u4e86\u5f88\u9ad8\u7684 variance error\u3002\u56e0\u6b64\u9019\u6a23\u7684\u7d50\u679c\u6211\u5011\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u8f03\u8907\u96dc\u7684\u6a21\u578b\u6709\u5c0f\u7684 bias\uff0c\u5927\u7684 variance\u3002 Error \u4f86\u81ea\u65bc variance \u5f88\u5927\uff0c\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u5982\u4f55\u907f\u514d\u6b20\u64ec\u5408\uff1f \u901a\u5e38 bias \u5927\u800c\u5c0e\u81f4\u6a21\u578b\u904e\u65bc\u7c21\u55ae\uff0c\u800c\u7121\u6cd5\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u589e\u52a0\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u505a\u4e00\u4e9b\u7279\u5fb5\u5de5\u7a0b\u8b93\u6a21\u578b\u89c0\u5bdf\u591a\u9ede\u7dda\u7d22\u3002\u6216\u662f\u8abf\u6574\u6a21\u578b\u7684\u6f14\u7b97\u6cd5\uff0c\u4f7f\u6a21\u578b\u66f4\u8907\u96dc\u3002\u4f8b\u5982\u4f7f\u7528\u9805\u6b21\u66f4\u9ad8\u7684\u591a\u9805\u5f0f\u6a21\u578b\uff0c\u6216\u662f tree-based \u6a21\u578b\u4e2d\u9069\u7576\u7684\u589e\u52a0\u6a39\u7684\u6df1\u5ea6......\u7b49\u3002\u9019\u88cf\u66f4\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c\u7576\u6a21\u578b\u6b20\u64ec\u5408\u6642\u641c\u96c6\u518d\u591a\u7684\u8a13\u7df4\u8cc7\u6599\u662f\u6c92\u6709\u7528\u7684\u3002\u56e0\u70ba\u7c21\u55ae\u7684\u6a21\u578b\u6bd4\u8f03\u4e0d\u6703\u53d7\u8cc7\u6599\u7684\u5f71\u97ff\uff0c\u6240\u4ee5 variance \u76f8\u5c0d\u7684\u6703\u6bd4\u8f03\u4f4e\u800c bias \u5927\uff0c\u4e5f\u5c31\u662f\u8f38\u51fa\u7684\u8b8a\u5316\u6027\u4e0d\u5927\u3002\u5f9e\u9019\u88e1\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u7c21\u55ae\u7684\u6a21\u578b\u53d7\u5230\u4e0d\u540c\u7684\u8f38\u5165\u8cc7\u6599\u53d7\u5230\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002\u56e0\u70ba\u6a21\u578b\u9078\u5f97\u4e0d\u597d\uff0c\u518d\u600e\u9ebc\u8a13\u7df4\u4ed6\u7684 bias \u9084\u662f\u4e00\u6a23\u5927\u3002 \u589e\u52a0\u8f38\u5165\u7279\u5fb5\u6216\u7279\u5fb5\u5de5\u7a0b \u63d0\u9ad8\u6a21\u578b\u8907\u96dc\u5ea6 \u5982\u4f55\u907f\u514d\u904e\u5ea6\u64ec\u5408\uff1f \u7576\u6a21\u578b\u904e\u65bc\u8907\u96dc\u904e\u5ea6\u64ec\u5408\u767c\u751f\u7684\u6a5f\u7387\u76f8\u5c0d\u63d0\u9ad8\uff0c\u6211\u5011\u53ef\u4ee5\u5f9e\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u89c0\u5bdf\uff0c\u5f88\u5bb9\u6613\u5730\u6aa2\u6e2c\u6a21\u578b\u662f\u5426\u904e\u5ea6\u64ec\u5408\u3002\u4f46\u662f\u6211\u5011\u61c9\u8a72\u5982\u4f55\u907f\u514d\u6a21\u578b\u592a\u904e\u65bc\u8907\u96dc\uff0c\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u767c\u751f\u5462\uff1f\u901a\u5e38\u6211\u5011\u6703\u8a3a\u65b7\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u9019\u4e9b\u932f\u8aa4\u4f86\u81ea\u65bc\u5169\u7a2e\uff0c\u5206\u5225\u70ba\u6709 bias \u8207 variance\u3002\u5982\u679c\u6211\u5011\u80fd\u5920\u8a3a\u65b7\u51fa\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u6211\u5011\u5c31\u80fd\u6311\u51fa\u9069\u7576\u7684\u65b9\u6cd5\u4f86\u6539\u5584\u6a21\u578b\u3002\u4ee5\u4e0b\u5e7e\u9ede\u6216\u8a31\u80fd\u5920\u5e6b\u52a9\u4f60\u9032\u884c\u5efa\u6a21\uff1a \u641c\u96c6\u66f4\u591a\u8a13\u7df4\u8cc7\u6599 \u589e\u52a0\u8a0a\u7df4\u96c6\u7684\u8cc7\u6599\u91cf\u662f\u6709\u6548\u63a7\u5236 variance \u7684\u65b9\u6cd5\uff0c\u4e26\u4e14\u4e0d\u6703\u589e\u52a0 bias\u3002 \u6a21\u578b\u6dfb\u52a0 Regularization \u5728\u640d\u5931\u51fd\u6578\u4e2d\u589e\u52a0\u4e00\u4e9b\u9650\u5236\u5f0f\uff0c\u964d\u4f4e\u6a21\u578b\u8907\u96dc\u3002 \u4ea4\u53c9\u9a57\u8b49 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u5207\u51fa\u9a57\u8b49\u96c6\uff0c\u4e26\u6311\u51fa\u597d\u7684\u6a21\u578b\u3002\u800c\u4e0d\u662f\u5f9e\u6e2c\u8a66\u96c6\u4e2d\u6c42\u6700\u5c0f error\u3002 Early Stopping \u8a2d\u5b9a\u7576\u6a21\u578b\u9023\u7e8c\u5e7e\u5e36\u90fd\u7121\u6cd5\u6539\u5584 error\uff0c\u5c31\u7acb\u5373\u7d42\u6b62\u8a13\u7df4\u3002 Ensembling \u900f\u904e\u8a13\u7df4\u591a\u500b\u6a21\u578b\uff0c\u4e26\u53d6\u5f97\u6bcf\u500b\u6a21\u578b\u9810\u6e2c\u4e26\u5e73\u5747\u4f5c\u70ba\u6700\u7d42\u8f38\u51fa\u3002 Reference Overfitting in Machine Learning: What It Is and How to Prevent It WTF is the Bias-Variance Tradeoff? (Infographic) \u3010\u6a5f\u5668\u5b78\u7fd2\u3011\u504f\u5dee\u8207\u65b9\u5dee\u4e4b\u6b0a\u8861 Bias-Variance Tradeoff \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#day-24-","text":"","title":"[Day 24] \u6a5f\u5668\u5b78\u7fd2 - \u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_1","text":"\u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f \u6df1\u5165\u7406\u89e3\u5ea6\u64ec\u5408\u8207\u6b20\u64ec\u5408 Bias-Variance Tradeoff \u5982\u4f55\u907f\u514d\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408\uff1f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_2","text":"\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5927\u5bb6\u53ef\u80fd\u6700\u5e38\u898b\u7684\u554f\u984c\u662f\uff0c\u7576\u8a13\u7df4\u597d\u4e86\u6a21\u578b\u4e26\u5728\u6e2c\u8a66\u8cc7\u6599\u4e5f\u7372\u5f97\u4e0d\u932f\u7684\u6210\u7e3e\u3002\u65bc\u662f\u5f88\u958b\u5fc3\u7684\u843d\u5730\u4e26\u90e8\u7f72\u5230\u771f\u5be6\u5834\u57df\u4e2d\uff0c\u6b8a\u4e0d\u77e5\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u9060\u9060\u4e0d\u5982\u9810\u671f\u3002\u6211\u60f3\u9019\u500b\u75db\u9ede\u5927\u5bb6\u61c9\u8a72\u90fd\u7d93\u6b77\u904e\uff0c\u5c24\u5176\u662f\u6a5f\u5668\u5b78\u7fd2\u7684\u65b0\u624b\u3002\u9019\u7a2e\u60c5\u6cc1\u5c31\u662f\u6240\u8b02\u7684\u904e\u5ea6\u64ec\u5408\uff0c\u5b83\u662f\u4e00\u500b\u5728\u6a5f\u5668\u5b78\u7fd2\u9818\u57df\u4e2d\u975e\u5e38\u68d8\u624b\u7684\u7684\u554f\u984c\u3002\u7576\u4f60\u7684\u6a21\u578b\u904e\u5ea6\u7684\u64ec\u5408\u8a13\u7df4\u96c6\uff0c\u9019\u610f\u5473\u8457\u4f60\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u7684\u53bb\u8a18\u4f4f\u6240\u6709\u73fe\u6709\u7684\u6578\u64da\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u4e0d\u4f73\uff0c\u9019\u4e0d\u662f\u6211\u5011\u671f\u671b\u7684\u3002\u6240\u8b02\u7684\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u662f\u6307\uff0c\u7576\u6211\u5229\u7528\u8a13\u7df4\u96c6\u8a13\u7df4\u4e00\u500b\u6a21\u578b\u5f8c\u518d\u62ff\u53e6\u4e00\u7d44\u6a21\u578b\u6c92\u770b\u904e\u7684\u8cc7\u6599\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u7684\u9810\u6e2c\u7d50\u679c\u5982\u679c\u5728\u6c92\u770b\u904e\u7684\u8cc7\u6599\u4e2d\u4f9d\u7136\u4fdd\u6301\u4e0d\u932f\u7684\u8868\u73fe\u6211\u5011\u5c31\u53ef\u4ee5\u8aaa\u6b64\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u5f37\u3002\u4eca\u5929\u6211\u5011\u5c07\u4f86\u8a73\u7d30\u63a2\u8a0e\u4f55\u8b02\u904e\u5ea6\u64ec\u5408\uff0c\u4ee5\u53ca\u8a72\u5982\u4f55\u53bb\u89e3\u6c7a\u5b83\u4f7f\u5f97\u6a21\u578b\u8655\u65bc\u4e00\u500b\u9069\u7576\u7684\u72c0\u614b\u3002","title":"\u524d\u8a00"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_3","text":"\u901a\u5e38\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u51fa\u4f86\u7684\u7d50\u679c\u8981\u8207\u5be6\u969b\u7684\u6578\u503c\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u4e5f\u5c31\u662f\u5728\u6a21\u578b\u8a13\u7df4\u7684\u904e\u7a0b\u4e2d\u6211\u5011\u8981\u60f3\u8fa6\u6cd5\u6700\u5c0f\u5316\u8aa4\u5dee\u4f7f\u5f97\u6a21\u578b\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8a55\u4f30\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u597d\u58de\u5462\uff1f\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u8981\u8a13\u7df4\u4e00\u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u6700\u7c21\u55ae\u7684\u65b9\u6cd5\u662f\u627e\u51fa\u4e00\u689d\u7dda\u5920\u5c07\u9019\u5169\u500b\u985e\u5225\u5b8c\u6574\u5730\u5206\u958b\uff0c\u7136\u800c\u9019\u4e00\u689d\u5207\u5272\u7684\u7dda\u8981\u9577\u5f97\u600e\u6a23\u624d\u662f\u597d\u7684\u6a21\u578b\u5462\uff1f\u5f9e\u4e0b\u5716\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5b8c\u6574\u7684\u64ec\u5408\u65bc\u8a13\u7df4\u8cc7\u6599\uff0c\u800c\u7d05\u8272\u5be6\u7dda\u7684\u6a21\u578b\u76f8\u5c0d\u7684\u6bd4\u8f03\u6c92\u6709\u90a3\u9ebc\u56b4\u53b2\uff0c\u5728\u5169\u500b\u985e\u5225\u9593\u9069\u7576\u7684\u627e\u51fa\u4e00\u689d\u5e73\u6ed1\u7684\u66f2\u7dda\u4f86\u5340\u9694\u5169\u985e\u7684\u8cc7\u6599\u3002 \u63a5\u8457\u6211\u5011\u62ff\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u9810\u6e2c\uff0c\u53ef\u4ee5\u767c\u73fe\u7531\u65bc\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u5b8c\u6574\u8a18\u4f4f\u4e86\u8a13\u7df4\u96c6\u7684\u8da8\u52e2\uff0c\u56e0\u6b64\u5728\u65b0\u7684\u6c92\u770b\u904e\u7684\u8cc7\u6599\u8868\u73fe\u5c31\u6c92\u6709\u90a3\u9ebc\u597d\u4e86\u3002\u5c24\u5176\u662f\u5728\u5169\u985e\u5225\u5206\u9694\u7dda\u9644\u8fd1\u7684\u8cc7\u6599\u6700\u80fd\u770b\u51fa\u7aef\u502a\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u5f88\u78ba\u5b9a\u7d05\u8272\u865b\u7dda\u7684\u6a21\u578b\u5df2\u7d93\u904e\u5ea6\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u4e86\u3002\u53e6\u5916\u7d05\u8272\u5be6\u73fe\u7684\u6a21\u578b\u96d6\u7136\u5728\u8a13\u7df4\u96c6\u4e2d\u6709\u5e7e\u7b46\u6703\u9810\u6e2c\u932f\u8aa4\uff0c\u4f46\u662f\u5b83\u518d\u6e2c\u8a66\u96c6\u8cc7\u6599\u4e2d\u4e00\u6a23\u4fdd\u6301\u7a69\u5b9a\u7684\u9810\u6e2c\u80fd\u529b\u3002 \u5f9e\u4e0a\u8ff0\u7684\u4f8b\u5b50\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\uff0c\u5728\u8a13\u7df4\u6a21\u578b\u6642\u4e26\u975e\u8a13\u7df4\u96c6\u7684\u8aa4\u5dee\u8d8a\u5c0f\u8d8a\u597d\u3002\u6211\u5011\u5fc5\u9808\u540c\u6642\u62ff\u6e2c\u8a66\u96c6\u9a57\u8b49\u6a21\u578b\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u8aa4\u5dee\u8981\u8d8a\u8fd1\u8d8a\u597d\u3002 \u4e00\u500b\u9069\u7576\u7684\u6a5f\u5668\u5b78\u7fd2\u5de5\u4f5c\u6d41\u7a0b\u5305\u62ec\uff1a \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u8cc7\u6599\u8996\u89ba\u5316\u8207\u524d\u8655\u7406 \u5c0b\u627e\u9069\u5408\u7684\u6a21\u578b \u8abf\u6574\u6a21\u578b\u8d85\u53c3\u6578 \u4f7f\u7528\u9069\u7576\u7684\u6307\u6a19\u8a55\u4f30\u6a21\u578b \u4ea4\u53c9\u9a57\u8b49\u6a21\u578b","title":"\u5982\u4f55\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#overfitting-vs-underfitting","text":"\u904e\u5ea6\u64ec\u5408\u7684\u53cd\u7fa9\u5c31\u662f\u6b20\u64ec\u5408\uff0c\u5f9e\u5b57\u9762\u4e0a\u53ef\u4ee5\u5f97\u77e5\u6a21\u578b\u9810\u6e2c\u80fd\u529b\u662f\u4e0d\u597d\u7684\u3002\u7576\u6a21\u578b\u592a\u7c21\u55ae\u6642\u6703\u767c\u751f\u6b20\u64ec\u5408\uff0c\u6216\u662f\u52a0\u5165\u592a\u591a\u7684 L1/L2 \u6b63\u5247\u5316\u9650\u5236\u6a21\u578b\u9810\u6e2c\u80fd\u529b\uff0c\u4f7f\u6a21\u578b\u5728\u5f9e\u6578\u64da\u96c6\u4e2d\u5b78\u7fd2\u6642\u8b8a\u5f97\u4e0d\u9748\u6d3b\u3002\u4e00\u500b\u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u5728\u9810\u6e2c\u4e2d\u5f80\u5f80\u5177\u6709\u8f03\u5c0f\u7684\u65b9\u5dee(variance)\u800c\u5c0e\u81f4\u504f\u5dee(bias)\u5c31\u6703\u8b8a\u5927\u3002\u76f8\u53cd\u7684\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u6709\u8f03\u7684\u8b8a\u7570\u9032\u800c\u5c0e\u81f4\u65b9\u5dee\u5927\uff0c\u540c\u6642\u504f\u5dee\u6703\u8b8a\u5c0f\u3002\u504f\u5dee\u548c\u65b9\u5dee\u90fd\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u7684\u9810\u6e2c\u8aa4\u5dee\u7684\u65b9\u5f0f\u3002\u5728\u4e00\u822c\u60c5\u6cc1\u4e0b\u6211\u5011\u53ef\u4ee5\u6e1b\u5c11\u504f\u5dee\u6240\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u4f46\u53ef\u80fd\u6703\u5c0e\u81f4\u589e\u52a0\u65b9\u5dee\u5f15\u8d77\u7684\u8aa4\u5dee\uff0c\u53cd\u4e4b\u4ea6\u7136\u3002 \u9019\u88e1\u6211\u5011\u5c31\u8981\u4f86\u601d\u8003\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e2d\u7684 error \u5f9e\u4f55\u800c\u4f86\uff1f\u6a21\u578b\u4e2d\u7684 error \u662f\u5224\u65b7\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u4f9d\u64da\uff0c\u4f46\u5176\u5be6\u6211\u5011\u53ef\u4ee5\u5c07 error \u62c6\u5206\u6210\u5169\u5927\u90e8\u5206\u3002\u5206\u5225\u6709 Bias \u8207 Variance \u5169\u500b\u90e8\u5206\u3002\u4ee5\u5be6\u969b\u4f8b\u5b50\u4f86\u8aaa\uff0c\u5047\u8a2d\u8f38\u51fa y \u662f\u8f38\u5165 x \u771f\u6b63\u7684\u7b54\u6848\uff0c\u800c \u0177 \u5247\u662f\u900f\u904e\u6a21\u578b f(x) \u8a13\u7df4\u51fa\u4f86\u7684\u9810\u6e2c\u503c\uff0c\u6211\u5011\u5e0c\u671b\u9810\u6e2c\u7684\u7d50\u679c\u8981\u8207\u771f\u5be6\u7b54\u6848\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u7576 \u0177\u2260y \u6642\u5c31\u6703\u7522\u751f error (\u8aa4\u5dee)\u3002","title":"Overfitting vs. Underfitting"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#bias-variance-tradeoff","text":"\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u5b58\u5728\u8457\u4e00\u4e9b\u95dc\u4fc2\uff0c\u6211\u5011\u5fc5\u9808\u5f9e\u4e2d\u627e\u5230\u4e00\u500b\u9069\u7576\u7684\u5e73\u8861\u9ede\u3002\u56e0\u6b64\u6211\u5011\u5e0c\u671b\u900f\u904e\u6b0a\u8861 bias error \u8ddf variance error \u4f86\u4f7f\u5f97\u7e3d\u8aa4\u5dee\u9054\u5230\u6700\u5c0f\u3002\u6211\u5011\u5e38\u6703\u4ee5\u6253\u9776\u4f8b\u5b50\u89e3\u91cb\u65b9\u5dee\u8207\u504f\u5dee\u4e4b\u9593\u7684\u95dc\u806f\u6027\u3002\u5047\u8a2d\u6211\u5011\u767c\u5c04\u5341\u6b21\uff0c\u6211\u5011\u8aac\u4e00\u500b\u4eba\u7684\u6253\u9776\u6280\u8853\u5f88\u7cbe\u6e96\u3002\u5176\u4e2d\u7684 \u7cbe \u5c31\u8868\u793a\u9019\u5341\u500b\u628a\u9762\u4e0a\u7684\u9ede\u5f7c\u6b64\u9593\u8ddd\u96e2\u90fd\u76f8\u7576\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u7684\u65b9\u5dee\u975e\u5e38\u4f4e(low variance)\u3002\u53e6\u5916\u6240\u8b02\u7684 \u6e96 \u5c31\u8868\u793a\u9019\u5341\u500b\u9ede\u90fd\u96e2\u6e96\u5fc3\u5f88\u8fd1\uff0c\u4e5f\u5c31\u662f\u6211\u5011\u7684\u504f\u5dee\u975e\u5e38\u4f4e(low bias)\u3002 Underfitting: \u904e\u65bc\u7c21\u55ae\u7684\u6a21\u578b\u4f7f\u5f97\u9810\u6e2c\u7d50\u679c\u5f48\u6027\u4e0d\u9ad8\uff0c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u8868\u73fe\u90fd\u4e0d\u597d\u3002low variance (high bias)\u3002 Overfitting: \u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u4f7f\u5f97\u8a13\u7df4\u96c6\u5b8c\u6574\u7684\u88ab\u64ec\u5408\uff0c\u56e0\u6b64\u8a13\u7df4\u96c6\u8868\u73fe\u6975\u597d\uff0c\u4f46\u6e2c\u8a66\u96c6\u8868\u73fe\u4e0d\u4f73\u3002high variance (low bias)\u3002","title":"Bias-Variance Tradeoff"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#error-from-bias","text":"\u504f\u5dee(bias)\u5c31\u662f\u6a21\u578b\u7684\u9810\u6e2c\u8207\u771f\u5be6\u503c\u4e4b\u9593\u7684\u5dee\u7570\u3002\u4e00\u822c\u6211\u5011\u8a13\u7df4\u6a21\u578b\u662f\u671f\u671b\u9810\u6e2c\u7684\u503c\u8981\u8207\u5be6\u969b\u7684\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u7136\u800c\u7576\u4e00\u500b\u7c21\u55ae\u7684\u7dda\u6027\u6a21\u578b\u53ef\u80fd\u7121\u6cd5\u5b8c\u6574\u5730\u64ec\u5408\u5230\u4e00\u500b\u8907\u96dc\u975e\u7dda\u6027\u7684\u8cc7\u6599\u96c6\u3002\u56e0\u6b64\u5982\u4e0b\u5716\u6240\u793a\uff0c\u7576\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7d50\u679c\u504f\u5dee\u904e\u5927\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u8a72\u6a21\u578b\u904e\u65bc\u7c21\u55ae\u3002\u7121\u8ad6\u641c\u96c6\u518d\u591a\u7684\u8cc7\u6599\uff0c\u7dda\u6027\u7684\u6a21\u578b\u6c38\u9060\u7121\u6cd5\u64ec\u5408\u975e\u7dda\u6027\u7684\u66f2\u7dda\u3002\u56e0\u70ba\u6bd4\u8f03\u7c21\u55ae\u7684\u6a21\u578b\uff0c\u4ed6\u53d7\u5230\u4e0d\u540c\u7684\u8cc7\u6599\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002 \u7c21\u55ae\u7684\u6a21\u578b\u6709\u5927\u7684 bias\uff0c\u5c0f\u7684 variance\u3002 Error \u4f86\u81ea\u65bc bias \u5f88\u5927\uff0c\u7a31\u70ba\u6b20\u64ec\u5408\u3002","title":"Error from Bias"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#error-from-variance","text":"\u65b9\u5dee(variance)\u662f\u6307\u4f60\u7684\u6a21\u578b\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u654f\u611f\u7a0b\u5ea6\u3002\u4e00\u500b\u904e\u65bc\u8907\u96dc\u7684\u6a21\u578b\u6703\u5c0e\u81f4\u8f38\u51fa\u7684\u8b8a\u7570\u6027\u975e\u5e38\u5927\u3002\u6a21\u578b\u6b7b\u80cc\u6240\u6709\u8a13\u7df4\u96c6\u4e2d\u7684\u6578\u64da\u9ede\u6703\u5c0e\u81f4\u4e00\u500b\u554f\u984c\u767c\u751f\u3002\u7576\u4f60\u7684\u8a13\u7df4\u8cc7\u6599\u6709\u9700\u591a\u7684\u96a8\u6a5f\u8aa4\u5dee\u6216\u662f\u96e2\u7fa4\u503c\u6642\uff0c\u6211\u5011\u53c8\u628a\u9019\u4e9b\u7570\u5e38\u503c\u5168\u90e8\u64ec\u5408\u9032\u6a21\u578b\u88e1\u9762\uff0c\u5c0e\u81f4\u5b78\u51fa\u4f86\u7684\u6a21\u578b\u904e\u65bc\u8907\u96dc\u540c\u6642\u964d\u4f4e\u6cdb\u5316\u80fd\u529b\uff0c\u5c0d\u65bc\u672a\u77e5\u7684\u8cc7\u6599\u9810\u6e2c\u7684\u80fd\u529b\u5c31\u6703\u5f88\u5dee\uff0c\u540c\u6642\u9020\u5c31\u4e86\u5f88\u9ad8\u7684 variance error\u3002\u56e0\u6b64\u9019\u6a23\u7684\u7d50\u679c\u6211\u5011\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002 \u8f03\u8907\u96dc\u7684\u6a21\u578b\u6709\u5c0f\u7684 bias\uff0c\u5927\u7684 variance\u3002 Error \u4f86\u81ea\u65bc variance \u5f88\u5927\uff0c\u7a31\u70ba\u904e\u5ea6\u64ec\u5408\u3002","title":"Error from Variance"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_4","text":"\u901a\u5e38 bias \u5927\u800c\u5c0e\u81f4\u6a21\u578b\u904e\u65bc\u7c21\u55ae\uff0c\u800c\u7121\u6cd5\u64ec\u5408\u8a13\u7df4\u8cc7\u6599\u3002\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u589e\u52a0\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u505a\u4e00\u4e9b\u7279\u5fb5\u5de5\u7a0b\u8b93\u6a21\u578b\u89c0\u5bdf\u591a\u9ede\u7dda\u7d22\u3002\u6216\u662f\u8abf\u6574\u6a21\u578b\u7684\u6f14\u7b97\u6cd5\uff0c\u4f7f\u6a21\u578b\u66f4\u8907\u96dc\u3002\u4f8b\u5982\u4f7f\u7528\u9805\u6b21\u66f4\u9ad8\u7684\u591a\u9805\u5f0f\u6a21\u578b\uff0c\u6216\u662f tree-based \u6a21\u578b\u4e2d\u9069\u7576\u7684\u589e\u52a0\u6a39\u7684\u6df1\u5ea6......\u7b49\u3002\u9019\u88cf\u66f4\u503c\u5f97\u4e00\u63d0\u7684\u662f\uff0c\u7576\u6a21\u578b\u6b20\u64ec\u5408\u6642\u641c\u96c6\u518d\u591a\u7684\u8a13\u7df4\u8cc7\u6599\u662f\u6c92\u6709\u7528\u7684\u3002\u56e0\u70ba\u7c21\u55ae\u7684\u6a21\u578b\u6bd4\u8f03\u4e0d\u6703\u53d7\u8cc7\u6599\u7684\u5f71\u97ff\uff0c\u6240\u4ee5 variance \u76f8\u5c0d\u7684\u6703\u6bd4\u8f03\u4f4e\u800c bias \u5927\uff0c\u4e5f\u5c31\u662f\u8f38\u51fa\u7684\u8b8a\u5316\u6027\u4e0d\u5927\u3002\u5f9e\u9019\u88e1\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u7c21\u55ae\u7684\u6a21\u578b\u53d7\u5230\u4e0d\u540c\u7684\u8f38\u5165\u8cc7\u6599\u53d7\u5230\u7684\u5f71\u97ff\u662f\u6bd4\u8f03\u5c0f\u7684\u3002\u56e0\u70ba\u6a21\u578b\u9078\u5f97\u4e0d\u597d\uff0c\u518d\u600e\u9ebc\u8a13\u7df4\u4ed6\u7684 bias \u9084\u662f\u4e00\u6a23\u5927\u3002 \u589e\u52a0\u8f38\u5165\u7279\u5fb5\u6216\u7279\u5fb5\u5de5\u7a0b \u63d0\u9ad8\u6a21\u578b\u8907\u96dc\u5ea6","title":"\u5982\u4f55\u907f\u514d\u6b20\u64ec\u5408\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#_5","text":"\u7576\u6a21\u578b\u904e\u65bc\u8907\u96dc\u904e\u5ea6\u64ec\u5408\u767c\u751f\u7684\u6a5f\u7387\u76f8\u5c0d\u63d0\u9ad8\uff0c\u6211\u5011\u53ef\u4ee5\u5f9e\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u89c0\u5bdf\uff0c\u5f88\u5bb9\u6613\u5730\u6aa2\u6e2c\u6a21\u578b\u662f\u5426\u904e\u5ea6\u64ec\u5408\u3002\u4f46\u662f\u6211\u5011\u61c9\u8a72\u5982\u4f55\u907f\u514d\u6a21\u578b\u592a\u904e\u65bc\u8907\u96dc\uff0c\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u767c\u751f\u5462\uff1f\u901a\u5e38\u6211\u5011\u6703\u8a3a\u65b7\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u9019\u4e9b\u932f\u8aa4\u4f86\u81ea\u65bc\u5169\u7a2e\uff0c\u5206\u5225\u70ba\u6709 bias \u8207 variance\u3002\u5982\u679c\u6211\u5011\u80fd\u5920\u8a3a\u65b7\u51fa\u9019\u4e9b\u932f\u8aa4\u7684\u4f86\u6e90\uff0c\u6211\u5011\u5c31\u80fd\u6311\u51fa\u9069\u7576\u7684\u65b9\u6cd5\u4f86\u6539\u5584\u6a21\u578b\u3002\u4ee5\u4e0b\u5e7e\u9ede\u6216\u8a31\u80fd\u5920\u5e6b\u52a9\u4f60\u9032\u884c\u5efa\u6a21\uff1a \u641c\u96c6\u66f4\u591a\u8a13\u7df4\u8cc7\u6599 \u589e\u52a0\u8a0a\u7df4\u96c6\u7684\u8cc7\u6599\u91cf\u662f\u6709\u6548\u63a7\u5236 variance \u7684\u65b9\u6cd5\uff0c\u4e26\u4e14\u4e0d\u6703\u589e\u52a0 bias\u3002 \u6a21\u578b\u6dfb\u52a0 Regularization \u5728\u640d\u5931\u51fd\u6578\u4e2d\u589e\u52a0\u4e00\u4e9b\u9650\u5236\u5f0f\uff0c\u964d\u4f4e\u6a21\u578b\u8907\u96dc\u3002 \u4ea4\u53c9\u9a57\u8b49 \u5f9e\u8a13\u7df4\u96c6\u4e2d\u5207\u51fa\u9a57\u8b49\u96c6\uff0c\u4e26\u6311\u51fa\u597d\u7684\u6a21\u578b\u3002\u800c\u4e0d\u662f\u5f9e\u6e2c\u8a66\u96c6\u4e2d\u6c42\u6700\u5c0f error\u3002 Early Stopping \u8a2d\u5b9a\u7576\u6a21\u578b\u9023\u7e8c\u5e7e\u5e36\u90fd\u7121\u6cd5\u6539\u5584 error\uff0c\u5c31\u7acb\u5373\u7d42\u6b62\u8a13\u7df4\u3002 Ensembling \u900f\u904e\u8a13\u7df4\u591a\u500b\u6a21\u578b\uff0c\u4e26\u53d6\u5f97\u6bcf\u500b\u6a21\u578b\u9810\u6e2c\u4e26\u5e73\u5747\u4f5c\u70ba\u6700\u7d42\u8f38\u51fa\u3002","title":"\u5982\u4f55\u907f\u514d\u904e\u5ea6\u64ec\u5408\uff1f"},{"location":"24.\u4e0d\u80fd\u5ffd\u8996\u7684\u904e\u64ec\u5408\u8207\u6b20\u64ec\u5408/#reference","text":"Overfitting in Machine Learning: What It Is and How to Prevent It WTF is the Bias-Variance Tradeoff? (Infographic) \u3010\u6a5f\u5668\u5b78\u7fd2\u3011\u504f\u5dee\u8207\u65b9\u5dee\u4e4b\u6b0a\u8861 Bias-Variance Tradeoff \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/","text":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u5e38\u898b\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u6cd5 K-fold Leave one out cross validation Random Subsampling Bootstrap \u524d\u8a00 \u70ba\u4e86\u907f\u514d\u6a21\u578b\u8a13\u7df4\u767c\u751f\u904e\u5ea6\u64ec\u5408\uff0c\u901a\u5e38\u6211\u5011\u9084\u6703\u5f9e\u8a13\u7df4\u96c6\u5207\u4e00\u5c0f\u90e8\u5206\u8cc7\u6599\u51fa\u4f86\u9032\u884c\u9a57\u8b49\u3002\u9a57\u8b49\u96c6\u7684\u7528\u8655\u5247\u662f\u7528\u4f86\u6aa2\u8996\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6bcf\u6b21\u7684\u8fed\u4ee3\u7d50\u679c\u8a13\u7df4\u7684\u597d\u4e0d\u597d\u3002\u4f46\u8a72\u5982\u4f55\u5207\u51fa\u9019\u500b\u9a57\u8b49\u96c6\u6bd4\u8f03\u6709\u516c\u4fe1\u529b\u5462\uff1f\u5982\u679c\u6211\u5011\u50c5\u5207\u4e00\u5c0f\u4efd\u7684\u8cc7\u6599\u4ed6\u662f\u80fd\u6709\u6709\u6548\u7684\u8a55\u4f30\u8a13\u7df4\u6642\u6a21\u578b\u7684\u597d\u58de\u55ce\uff1f\u5728\u67d0\u4e9b\u60c5\u6cc1\u5e95\u4e0b\u55ae\u7d14\u76f4\u63a5\u5f9e\u8cc7\u6599\u96c6\u88e1\u9762\u5207\u4e00\u584a\u51fa\u4f86\u7576\u9a57\u8b49\u96c6\uff0c\u662f\u6c92\u6709\u8fa6\u6cd5\u5f88\u6709\u6548\u7684\u53bb\u8a55\u4f30\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7684\u597d\u58de\u3002\u8aaa\u4e0d\u5b9a\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u9019\u4e00\u4efd\u9a57\u8b49\u96c6\u6070\u597d\u8868\u73fe\u5f97\u4e0d\u932f\uff0c\u5982\u679c\u53c8\u96a8\u6a5f\u62bd\u53e6\u4e00\u4efd\u8cc7\u6599\u4f86\u7576\u9a57\u8b49\u96c6\u8aaa\u4e0d\u5b9a\u7d50\u679c\u6703\u8b8a\u5f97\u5f88\u7cdf\u7cd5\u3002\u9019\u5c31\u8868\u793a\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u4e0d\u8db3\u3002\u70ba\u4e86\u907f\u514d\u9019\u7a2e\u60c5\u6cc1\u767c\u751f\u4e26\u4e14\u6709\u6548\u7684\u5207\u5272\u9a57\u8b49\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7684\u6280\u5de7\u4f86\u7372\u5f97\u6700\u4f73\u9a57\u8b49\u3002 \u4ec0\u9ebc\u662f\u4ea4\u53c9\u9a57\u8b49\uff1f \u5728\u89e3\u91cb\u4ea4\u53c9\u9a57\u8b49\u4e4b\u524d\u6211\u5011\u5148\u4f86\u8a0e\u8ad6\u5c07\u8cc7\u6599\u96c6\u5207\u5206\u70ba\u8a13\u7df4\u96c6\u3001\u6e2c\u8a66\u96c6\u548c\u9a57\u8b49\u96c6\u7684\u554f\u984c\u3002\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5148\u5207\u5272\u6210\u5169\u7b49\u4efd\uff0c\u5206\u5225\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u968e\u6bb5\u6a21\u578b\u53ea\u6703\u5c0d\u8a13\u7df4\u96c6\u9032\u884c\u64ec\u5408\uff0c\u53e6\u5916\u6e2c\u8a66\u96c6\u7684\u8cc7\u6599\u4e26\u672a\u53c3\u8207\u8a13\u7df4\uff0c\u56e0\u6b64\u53ef\u4ee5\u62ff\u4f86\u7576\u4f5c\u6700\u7d42\u8a55\u4f30\u6a21\u578b\u7684\u597d\u58de\u3002\u4f46\u662f\u6211\u5011\u8a13\u7df4\u7684\u6a21\u578b\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u90fd\u6709\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u4e5f\u5c31\u662f\u8aaa loss \u8981\u8d8a\u4f4e\u8d8a\u597d\u3002\u56e0\u6b64\u6700\u5e38\u898b\u7684\u4f5c\u6cd5\u6703\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u5207\u51fa\u4e00\u500b\u9a57\u8b49\u96c6\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u7684\u6a21\u578b\u53c3\u6578\uff0c\u4f7f\u5f97\u9a57\u8b49\u96c6\u7684\u8868\u73fe\u8981\u6700\u597d\u3002\u4f46\u662f\u70ba\u4e86\u907f\u514d\u6a21\u578b\u5c0d\u65bc\u6211\u5011\u6240\u5207\u7684\u9a57\u8b49\u96c6\u904e\u5ea6\u64ec\u5408\uff0c\u56e0\u6b64\u53ef\u5df2\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u7684\u65b9\u6cd5\u5c0d\u6a21\u578b\u505a\u66f4\u597d\u7684\u8a55\u4f30\u3002\u6240\u8b02\u7684\u4ea4\u53c9\u9a57\u8b49\u7c21\u55ae\u4f86\u8aaa\u662f\u5c07\u8a13\u7df4\u8cc7\u6599\u9032\u884c\u5206\u7d44\uff0c\u4e00\u90e8\u5206\u505a\u70ba\u8a13\u7df4\u5b50\u96c6\u4f86\u8a13\u7df4\u6a21\u578b\uff0c\u53e6\u4e00\u90e8\u5206\u505a\u70ba\u9a57\u8b49\u5b50\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\u3002\u7528\u8a13\u7df4\u5b50\u96c6\u7684\u6578\u64da\u5148\u8a13\u7df4\u6a21\u578b\uff0c\u7136\u5f8c\u7528\u9a57\u8b49\u5b50\u96c6\u53bb\u8dd1\u4e00\u904d\uff0c\u770b\u9a57\u8b49\u96c6\u7684\u640d\u5931\u51fd\u6578(loss)\u6216\u662f\u5206\u985e\u6e96\u78ba\u7387\u7b49\u3002\u7b49\u6a21\u578b\u8a13\u7df4\u597d\u4e4b\u5f8c\uff0c\u518d\u7528\u6e2c\u8a66\u96c6\u53bb\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e3b\u8981\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\u6709\u4ee5\u4e0b\u5e7e\u500b\u65b9\u6cd5: Holdout K-fold Leave one out cross validation Random Subsampling Bootstrap Holdout Method \u6b64\u65b9\u6cd5\u662f\u6700\u7d93\u5178\u4e14\u6700\u7c21\u55ae\u5be6\u4f5c\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\uff0cHoldout \u9867\u540d\u601d\u7fa9\u5c31\u662f\u5c07\u8cc7\u6599\u5207\u51fa\u4e00\u90e8\u5206\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u7684\u4f9d\u64da\u3002\u5728\u9019\u7a2e\u65b9\u6cd5\u4e2d\uff0c\u6211\u5011\u5c07\u8cc7\u6599\u96a8\u6a5f\u5206\u70ba\u4e09\u90e8\u5206\uff1a\u8a13\u7df4\u96c6\u3001\u9a57\u8b49\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53ea\u6709\u8a13\u7df4\u96c6\u8cc7\u6599\u5be6\u969b\u53c3\u8207\u8a13\u7df4\uff0c\u5176\u9918\u7684\u8cc7\u6599\u50c5\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u597d\u58de\u3002\u9a57\u8b49\u96c6\u4f7f\u7528\u6642\u6a5f\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u6aa2\u8996\u8a13\u7df4\u7684\u8da8\u52e2\uff0c\u82e5\u6709\u767c\u73fe\u904e\u64ec\u5408\u64ec\u5408\u8de1\u8c61\u53ef\u4ee5\u63d0\u65e9\u767c\u73fe\u4e26\u89e3\u6c7a\u3002\u4ee5\u53ca\u65b9\u4fbf\u6211\u5011\u9032\u884c\u8abf\u6574\u8d85\u53c3\u6578\u4ee5\u53ca\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\u3002\u7576\u7136\u50c5\u900f\u904e\u9a57\u8b49\u96c6\u4e0d\u80fd\u4ee3\u8868\u5168\u90e8\uff0c\u56e0\u6b64\u6700\u5f8c\u78ba\u5b9a\u597d\u6a21\u578b\u6642\u3002\u6211\u5011\u6703\u518d\u62ff\u4e8b\u5148\u5207\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6700\u7d42\u7684\u8a55\u4f30\uff0c\u6aa2\u8996\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002 \u53c3\u8003 \u512a\u9ede : \u7c21\u55ae\u5be6\u4f5c\u3002 \u9a57\u8b49\u96c6\u53ef\u4ee5\u88ab\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u7684\u5b78\u7fd2\u6210\u679c\u3002 \u6e2c\u8a66\u96c6\u53ef\u4ee5\u8a55\u4f30\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002 \u7f3a\u9ede : \u7576\u8cc7\u6599\u96c6\u8b8a\u7570\u91cf\u8f03\u5927\u6642\uff0c\u9a57\u8b49\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u80fd\u7121\u6cd5\u8db3\u4ee5\u8a55\u4f30\u6a21\u578b\u3002 \u4e0d\u9069\u5408\u7528\u5728\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 K-fold Cross-Validation \u4e0a\u4e00\u500b\u65b9\u6cd5\u96d6\u7136\u7c21\u55ae\uff0c\u4f46\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u50c5\u5207\u4e00\u4efd\u9a57\u8b49\u96c6\u5f80\u5f80\u4e0d\u80fd\u5920\u4ee3\u8868\u5168\u90e8\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u4e00\u4e9b\u6280\u5de7\u5207\u5272\u9a57\u8b49\u96c6\uff0c\u4f7f\u5f97\u8a13\u7df4\u904e\u7a0b\u4e2d\u6709\u4e00\u500b\u66f4\u516c\u6b63\u7684\u8a55\u4f30\u65b9\u5f0f\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e K-Fold \u65b9\u6cd5\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u4f9d\u5e8f\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0cK-Fold \u88e1\u9762\u7684\u6e2c\u8a66\u96c6\u53ef\u4ee5\u7576\u6210\u9a57\u8b49\u96c6\u3002K-Fold \u7684\u65b9\u6cd5\u4e2d K \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u4e2d\u6703\u9078\u64c7\u4e00\u7d44\u4f5c\u70ba\u9a57\u8b49\u96c6\uff0c\u5176\u9918 (k-1) \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u5b78\u7fd2\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 \u512a\u9ede : \u964d\u4f4e\u6a21\u578b\u8a13\u7df4\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u504f\u5dee\u3002 \u8a13\u7df4\u96c6\u8207\u9a57\u8b49\u96c6\u5b8c\u6574\u88ab\u5145\u5206\u5229\u7528\u8207\u5b78\u7fd2\u3002 \u7f3a\u9ede : \u4e0d\u9069\u5408\u7528\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 \u5982\u679c\u8981\u7c21\u55ae\u7684 K-fold \u4f86\u5c0b\u627e\u8d85\u53c3\u6578\u6703\u6709\u8cc7\u6599\u6d29\u6f0f\u554f\u984c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u6709\u504f\u5dee\uff0c\u56e0\u70ba\u5728\u6bcf\u500b Fold \u4e2d\u90fd\u6703\u4f7f\u7528\u540c\u4e00\u7d44\u8cc7\u6599\u9032\u884c\u9a57\u8b49\u3002 \u5728\u76f8\u540c\u7684\u9a57\u8b49\u96c6\u8a08\u7b97\u6a21\u578b\u7684\u8aa4\u5dee\uff0c\u7576\u627e\u5230\u4e86\u6700\u4f73\u7684\u8d85\u53c3\u6578\u3002\u9019\u53ef\u80fd\u6703\u5c0e\u81f4\u91cd\u5927\u504f\u5dee\uff0c\u6709\u904e\u64ec\u5408\u64ec\u5408\u7591\u616e\u3002 Leave One Out \u6b64\u65b9\u6cd5\u662f K-fold \u5176\u4e2d\u4e00\u7a2e\u7279\u4f8b\uff0c\u7576 K \u7b49\u65bc\u8cc7\u6599\u96c6\u7684\u6578\u91cf\u6642\u5c31\u7b49\u65bc Leave One Out \u65b9\u6cd5\u3002\u4e5f\u5c31\u662f\u5728\u6bcf\u6b21\u8a13\u7df4\u6642\u50c5\u6703\u628a\u4e00\u7b46\u8cc7\u6599\u7576\u6210\u6e2c\u8a66\u8cc7\u6599\uff0c\u5176\u9918\u7684 N-1 \u7b46\u8cc7\u6599\u4f5c\u70ba\u8a13\u7df4\u6a21\u578b\u7684\u8cc7\u6599\u3002\u6b64\u4f5c\u6cd5\u76f8\u7576\u7c21\u55ae\u660e\u77ad\uff0c\u4f46\u662f\u8a13\u7df4\u8ca0\u64d4\u6703\u975e\u5e38\u91cd\u4e14\u8017\u6642\u3002\u7136\u800c Leave p-out \u662f\u53e6\u4e00\u7a2e\u6280\u5de7\uff0c\u5176\u4e2d\u7684 p \u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u6bcf\u6b21\u8a13\u7df4\u9700\u8981\u7559\u5e7e\u7b46\u8cc7\u6599\u4f5c\u70ba\u6e2c\u8a66\u96c6\u3002 \u512a\u9ede : \u7c21\u55ae\u4e14\u5bb9\u6613\u7406\u89e3\uff0c\u597d\u5be6\u4f5c\u3002 \u7f3a\u9ede : \u9700\u8981\u82b1\u8cbb\u66f4\u591a\u7684\u8a13\u7df4\u6642\u9593\u3002 Random Subsampling Random Subsampling \u65b9\u6cd5\u662f\u4e00\u7a2e\u7c21\u55ae\u4e14\u5e38\u7528\u7684\u4ea4\u53c9\u9a57\u8b49\u6280\u8853\uff0c\u5b83\u900f\u904e\u591a\u6b21\u96a8\u6a5f\u62bd\u6a23\u5c07\u8cc7\u6599\u96c6\u5207\u5272\u6210\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u6bcf\u6b21\u96a8\u6a5f\u5206\u5272\u6642\uff0c\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u56fa\u5b9a\uff0c\u800c\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5247\u96a8\u6a5f\u9078\u53d6\u3002\u9019\u7a2e\u65b9\u6cd5\u4e3b\u8981\u900f\u904e\u591a\u6b21\u96a8\u6a5f\u6e2c\u8a66\u4e0d\u540c\u7684\u8cc7\u6599\u5207\u5206\u65b9\u5f0f\u4f86\u8a55\u4f30\u6a21\u578b\u6027\u80fd\uff0c\u6700\u5f8c\u53d6\u6e2c\u8a66\u7d50\u679c\u7684\u5e73\u5747\u503c\u3002 \u512a\u9ede : \u591a\u6b21\u96a8\u6a5f\u62bd\u6a23\u80fd\u6e1b\u5c11\u8cc7\u6599\u5283\u5206\u7684\u504f\u5dee\u3002 \u53ef\u4ee5\u9748\u6d3b\u9078\u64c7\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u3002 \u7f3a\u9ede : \u591a\u6b21\u91cd\u8907\u62bd\u6a23\u9700\u8981\u8f03\u5927\u7684\u8a08\u7b97\u8cc7\u6e90\u3002 \u6bcf\u6b21\u96a8\u6a5f\u62bd\u6a23\u7684\u8cc7\u6599\u96c6\u53ef\u80fd\u6703\u6709\u6240\u4e0d\u540c\uff0c\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002 Bootstrapping \u9084\u6709\u4e00\u7a2e\u6bd4\u8f03\u7279\u6b8a\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u5f0f\uff0cBootstrapping \u81ea\u52a9\u62bd\u6a23\u6cd5\u3002\u662f\u4e00\u7a2e\u5f9e\u7d66\u5b9a\u8a13\u7df4\u96c6\u4e2d\u6709\u653e\u56de\u7684\u5747\u52fb\u62bd\u6a23\uff0c\u4e5f\u5c31\u662f\u8aaa\uff0c\u6bcf\u7576\u9078\u4e2d\u4e00\u500b\u6a23\u672c\uff0c\u5b83\u7b49\u53ef\u80fd\u5730\u88ab\u518d\u6b21\u9078\u4e2d\u4e26\u88ab\u518d\u6b21\u6dfb\u52a0\u5230\u8a13\u7df4\u96c6\u4e2d\u3002\u5047\u8a2d\u6bcf\u6b21\u8a13\u7df4\u90fd\u63a1\u6a23\u5341\u500b\u6a23\u672c\uff0c\u5728\u9019\u5341\u7b46\u8cc7\u6599\u4e2d\u5f88\u6709\u53ef\u80fd\u6703\u518d\u6b21\u88ab\u96a8\u6a5f\u62bd\u5230\u3002\u5269\u4e0b\u6c92\u6709\u62bd\u5230\u7684\u8cc7\u6599\u5247\u90fd\u8b8a\u6210\u6e2c\u8a66\u96c6\uff0c\u7528\u4f86\u8a55\u4f30\u8a13\u7df4\u5b8c\u7684\u6a21\u578b\u3002 \u512a\u9ede : \u80fd\u5728\u5c0f\u6578\u64da\u96c6\u7684\u60c5\u6cc1\u4e0b\u63d0\u9ad8\u6a21\u578b\u7a69\u5b9a\u6027\u3002 \u53ef\u91cd\u8907\u5229\u7528\u76f8\u540c\u7684\u6578\u64da\u4f86\u9032\u884c\u591a\u6b21\u8a13\u7df4\u3002 \u7f3a\u9ede : \u91cd\u8907\u6a23\u672c\u53ef\u80fd\u5c0e\u81f4\u6a21\u578b\u904e\u64ec\u5408\u3002 \u6e2c\u8a66\u96c6\u4e2d\u8cc7\u6599\u91cf\u8f03\u5c11\uff0c\u53ef\u80fd\u5c0e\u81f4\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u8a55\u4f30\u4e0d\u5920\u5145\u5206\u3002 \u5c0f\u7d50 \u4ea4\u53c9\u9a57\u8b49\u662f\u8a13\u7df4\u6a21\u578b\u4e2d\u975e\u5e38\u91cd\u8981\u7684\u6280\u5de7\uff0c\u5c24\u5176\u662f\u7576\u624b\u908a\u7684\u8cc7\u6599\u96c6\u6709\u9650\u6642\u66f4\u61c9\u8a72\u4f7f\u7528\u3002\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u6280\u5de7\uff0c\u5373\u4f7f\u5728\u6578\u64da\u6709\u9650\u7684\u60c5\u6cc1\u4e0b\uff0c\u6211\u5011\u4e5f\u80fd\u5920\u7372\u5f97\u6e96\u78ba\u7684\u7d50\u679c\uff0c\u4e26\u4e14\u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002\u4e26\u70ba\u6211\u5011\u63d0\u4f9b\u66f4\u6e96\u78ba\u7684\u6a21\u578b\u9810\u6e2c\u6027\u80fd\u4f30\u8a08\u65b9\u5f0f\uff0c\u540c\u6642\u4e5f\u80fd\u5920\u63d0\u5347\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002\u4ee5\u4e0a\u7684\u65b9\u6cd5\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 scikit-learn \u88e1\u9762 model_selection \u5e95\u4e0b\u7684 cross_val_score \u65b9\u6cd5\u9032\u884c\u5be6\u4f5c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#day-25-cross-validation","text":"","title":"[Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_1","text":"\u5e38\u898b\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u6cd5 K-fold Leave one out cross validation Random Subsampling Bootstrap","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_2","text":"\u70ba\u4e86\u907f\u514d\u6a21\u578b\u8a13\u7df4\u767c\u751f\u904e\u5ea6\u64ec\u5408\uff0c\u901a\u5e38\u6211\u5011\u9084\u6703\u5f9e\u8a13\u7df4\u96c6\u5207\u4e00\u5c0f\u90e8\u5206\u8cc7\u6599\u51fa\u4f86\u9032\u884c\u9a57\u8b49\u3002\u9a57\u8b49\u96c6\u7684\u7528\u8655\u5247\u662f\u7528\u4f86\u6aa2\u8996\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6bcf\u6b21\u7684\u8fed\u4ee3\u7d50\u679c\u8a13\u7df4\u7684\u597d\u4e0d\u597d\u3002\u4f46\u8a72\u5982\u4f55\u5207\u51fa\u9019\u500b\u9a57\u8b49\u96c6\u6bd4\u8f03\u6709\u516c\u4fe1\u529b\u5462\uff1f\u5982\u679c\u6211\u5011\u50c5\u5207\u4e00\u5c0f\u4efd\u7684\u8cc7\u6599\u4ed6\u662f\u80fd\u6709\u6709\u6548\u7684\u8a55\u4f30\u8a13\u7df4\u6642\u6a21\u578b\u7684\u597d\u58de\u55ce\uff1f\u5728\u67d0\u4e9b\u60c5\u6cc1\u5e95\u4e0b\u55ae\u7d14\u76f4\u63a5\u5f9e\u8cc7\u6599\u96c6\u88e1\u9762\u5207\u4e00\u584a\u51fa\u4f86\u7576\u9a57\u8b49\u96c6\uff0c\u662f\u6c92\u6709\u8fa6\u6cd5\u5f88\u6709\u6548\u7684\u53bb\u8a55\u4f30\u4e00\u500b\u6a21\u578b\u8a13\u7df4\u7684\u597d\u58de\u3002\u8aaa\u4e0d\u5b9a\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u5728\u9019\u4e00\u4efd\u9a57\u8b49\u96c6\u6070\u597d\u8868\u73fe\u5f97\u4e0d\u932f\uff0c\u5982\u679c\u53c8\u96a8\u6a5f\u62bd\u53e6\u4e00\u4efd\u8cc7\u6599\u4f86\u7576\u9a57\u8b49\u96c6\u8aaa\u4e0d\u5b9a\u7d50\u679c\u6703\u8b8a\u5f97\u5f88\u7cdf\u7cd5\u3002\u9019\u5c31\u8868\u793a\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u4e0d\u8db3\u3002\u70ba\u4e86\u907f\u514d\u9019\u7a2e\u60c5\u6cc1\u767c\u751f\u4e26\u4e14\u6709\u6548\u7684\u5207\u5272\u9a57\u8b49\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\uff0c\u6211\u5011\u53ef\u4ee5\u63a1\u7528\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7684\u6280\u5de7\u4f86\u7372\u5f97\u6700\u4f73\u9a57\u8b49\u3002","title":"\u524d\u8a00"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_3","text":"\u5728\u89e3\u91cb\u4ea4\u53c9\u9a57\u8b49\u4e4b\u524d\u6211\u5011\u5148\u4f86\u8a0e\u8ad6\u5c07\u8cc7\u6599\u96c6\u5207\u5206\u70ba\u8a13\u7df4\u96c6\u3001\u6e2c\u8a66\u96c6\u548c\u9a57\u8b49\u96c6\u7684\u554f\u984c\u3002\u5728\u4e00\u822c\u72c0\u6cc1\u4e0b\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5148\u5207\u5272\u6210\u5169\u7b49\u4efd\uff0c\u5206\u5225\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u968e\u6bb5\u6a21\u578b\u53ea\u6703\u5c0d\u8a13\u7df4\u96c6\u9032\u884c\u64ec\u5408\uff0c\u53e6\u5916\u6e2c\u8a66\u96c6\u7684\u8cc7\u6599\u4e26\u672a\u53c3\u8207\u8a13\u7df4\uff0c\u56e0\u6b64\u53ef\u4ee5\u62ff\u4f86\u7576\u4f5c\u6700\u7d42\u8a55\u4f30\u6a21\u578b\u7684\u597d\u58de\u3002\u4f46\u662f\u6211\u5011\u8a13\u7df4\u7684\u6a21\u578b\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4e0d\u932f\u7684\u8d85\u53c3\u6578\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u90fd\u6709\u4e0d\u932f\u7684\u6210\u7e3e\uff0c\u4e5f\u5c31\u662f\u8aaa loss \u8981\u8d8a\u4f4e\u8d8a\u597d\u3002\u56e0\u6b64\u6700\u5e38\u898b\u7684\u4f5c\u6cd5\u6703\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u5207\u51fa\u4e00\u500b\u9a57\u8b49\u96c6\u4f86\u627e\u51fa\u4e00\u500b\u6700\u4f73\u7684\u6a21\u578b\u53c3\u6578\uff0c\u4f7f\u5f97\u9a57\u8b49\u96c6\u7684\u8868\u73fe\u8981\u6700\u597d\u3002\u4f46\u662f\u70ba\u4e86\u907f\u514d\u6a21\u578b\u5c0d\u65bc\u6211\u5011\u6240\u5207\u7684\u9a57\u8b49\u96c6\u904e\u5ea6\u64ec\u5408\uff0c\u56e0\u6b64\u53ef\u5df2\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u7684\u65b9\u6cd5\u5c0d\u6a21\u578b\u505a\u66f4\u597d\u7684\u8a55\u4f30\u3002\u6240\u8b02\u7684\u4ea4\u53c9\u9a57\u8b49\u7c21\u55ae\u4f86\u8aaa\u662f\u5c07\u8a13\u7df4\u8cc7\u6599\u9032\u884c\u5206\u7d44\uff0c\u4e00\u90e8\u5206\u505a\u70ba\u8a13\u7df4\u5b50\u96c6\u4f86\u8a13\u7df4\u6a21\u578b\uff0c\u53e6\u4e00\u90e8\u5206\u505a\u70ba\u9a57\u8b49\u5b50\u96c6\u4f86\u8a55\u4f30\u6a21\u578b\u3002\u7528\u8a13\u7df4\u5b50\u96c6\u7684\u6578\u64da\u5148\u8a13\u7df4\u6a21\u578b\uff0c\u7136\u5f8c\u7528\u9a57\u8b49\u5b50\u96c6\u53bb\u8dd1\u4e00\u904d\uff0c\u770b\u9a57\u8b49\u96c6\u7684\u640d\u5931\u51fd\u6578(loss)\u6216\u662f\u5206\u985e\u6e96\u78ba\u7387\u7b49\u3002\u7b49\u6a21\u578b\u8a13\u7df4\u597d\u4e4b\u5f8c\uff0c\u518d\u7528\u6e2c\u8a66\u96c6\u53bb\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e3b\u8981\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\u6709\u4ee5\u4e0b\u5e7e\u500b\u65b9\u6cd5: Holdout K-fold Leave one out cross validation Random Subsampling Bootstrap","title":"\u4ec0\u9ebc\u662f\u4ea4\u53c9\u9a57\u8b49\uff1f"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#holdout-method","text":"\u6b64\u65b9\u6cd5\u662f\u6700\u7d93\u5178\u4e14\u6700\u7c21\u55ae\u5be6\u4f5c\u7684\u4ea4\u53c9\u9a57\u8b49\u6cd5\uff0cHoldout \u9867\u540d\u601d\u7fa9\u5c31\u662f\u5c07\u8cc7\u6599\u5207\u51fa\u4e00\u90e8\u5206\u4f5c\u70ba\u6a21\u578b\u8a55\u4f30\u7684\u4f9d\u64da\u3002\u5728\u9019\u7a2e\u65b9\u6cd5\u4e2d\uff0c\u6211\u5011\u5c07\u8cc7\u6599\u96a8\u6a5f\u5206\u70ba\u4e09\u90e8\u5206\uff1a\u8a13\u7df4\u96c6\u3001\u9a57\u8b49\u96c6\u548c\u6e2c\u8a66\u96c6\u3002\u5176\u4e2d\u53ea\u6709\u8a13\u7df4\u96c6\u8cc7\u6599\u5be6\u969b\u53c3\u8207\u8a13\u7df4\uff0c\u5176\u9918\u7684\u8cc7\u6599\u50c5\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u597d\u58de\u3002\u9a57\u8b49\u96c6\u4f7f\u7528\u6642\u6a5f\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u53ef\u4ee5\u6aa2\u8996\u8a13\u7df4\u7684\u8da8\u52e2\uff0c\u82e5\u6709\u767c\u73fe\u904e\u64ec\u5408\u64ec\u5408\u8de1\u8c61\u53ef\u4ee5\u63d0\u65e9\u767c\u73fe\u4e26\u89e3\u6c7a\u3002\u4ee5\u53ca\u65b9\u4fbf\u6211\u5011\u9032\u884c\u8abf\u6574\u8d85\u53c3\u6578\u4ee5\u53ca\u9078\u64c7\u6700\u4f73\u7684\u6a21\u578b\u3002\u7576\u7136\u50c5\u900f\u904e\u9a57\u8b49\u96c6\u4e0d\u80fd\u4ee3\u8868\u5168\u90e8\uff0c\u56e0\u6b64\u6700\u5f8c\u78ba\u5b9a\u597d\u6a21\u578b\u6642\u3002\u6211\u5011\u6703\u518d\u62ff\u4e8b\u5148\u5207\u597d\u7684\u6e2c\u8a66\u96c6\u9032\u884c\u6700\u7d42\u7684\u8a55\u4f30\uff0c\u6aa2\u8996\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002 \u53c3\u8003 \u512a\u9ede : \u7c21\u55ae\u5be6\u4f5c\u3002 \u9a57\u8b49\u96c6\u53ef\u4ee5\u88ab\u62ff\u4f86\u8a55\u4f30\u6a21\u578b\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u7684\u5b78\u7fd2\u6210\u679c\u3002 \u6e2c\u8a66\u96c6\u53ef\u4ee5\u8a55\u4f30\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002 \u7f3a\u9ede : \u7576\u8cc7\u6599\u96c6\u8b8a\u7570\u91cf\u8f03\u5927\u6642\uff0c\u9a57\u8b49\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u80fd\u7121\u6cd5\u8db3\u4ee5\u8a55\u4f30\u6a21\u578b\u3002 \u4e0d\u9069\u5408\u7528\u5728\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002","title":"Holdout Method"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#k-fold-cross-validation","text":"\u4e0a\u4e00\u500b\u65b9\u6cd5\u96d6\u7136\u7c21\u55ae\uff0c\u4f46\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u50c5\u5207\u4e00\u4efd\u9a57\u8b49\u96c6\u5f80\u5f80\u4e0d\u80fd\u5920\u4ee3\u8868\u5168\u90e8\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u900f\u904e\u4e00\u4e9b\u6280\u5de7\u5207\u5272\u9a57\u8b49\u96c6\uff0c\u4f7f\u5f97\u8a13\u7df4\u904e\u7a0b\u4e2d\u6709\u4e00\u500b\u66f4\u516c\u6b63\u7684\u8a55\u4f30\u65b9\u5f0f\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e K-Fold \u65b9\u6cd5\u5c07\u8a13\u7df4\u8cc7\u6599\u518d\u4f9d\u5e8f\u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\uff0cK-Fold \u88e1\u9762\u7684\u6e2c\u8a66\u96c6\u53ef\u4ee5\u7576\u6210\u9a57\u8b49\u96c6\u3002K-Fold \u7684\u65b9\u6cd5\u4e2d K \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u4e2d\u6703\u9078\u64c7\u4e00\u7d44\u4f5c\u70ba\u9a57\u8b49\u96c6\uff0c\u5176\u9918 (k-1) \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\u5b78\u7fd2\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 \u512a\u9ede : \u964d\u4f4e\u6a21\u578b\u8a13\u7df4\u5c0d\u65bc\u8cc7\u6599\u96c6\u7684\u504f\u5dee\u3002 \u8a13\u7df4\u96c6\u8207\u9a57\u8b49\u96c6\u5b8c\u6574\u88ab\u5145\u5206\u5229\u7528\u8207\u5b78\u7fd2\u3002 \u7f3a\u9ede : \u4e0d\u9069\u5408\u7528\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u8cc7\u6599\u96c6\u3002 \u5982\u679c\u8981\u7c21\u55ae\u7684 K-fold \u4f86\u5c0b\u627e\u8d85\u53c3\u6578\u6703\u6709\u8cc7\u6599\u6d29\u6f0f\u554f\u984c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u6709\u504f\u5dee\uff0c\u56e0\u70ba\u5728\u6bcf\u500b Fold \u4e2d\u90fd\u6703\u4f7f\u7528\u540c\u4e00\u7d44\u8cc7\u6599\u9032\u884c\u9a57\u8b49\u3002 \u5728\u76f8\u540c\u7684\u9a57\u8b49\u96c6\u8a08\u7b97\u6a21\u578b\u7684\u8aa4\u5dee\uff0c\u7576\u627e\u5230\u4e86\u6700\u4f73\u7684\u8d85\u53c3\u6578\u3002\u9019\u53ef\u80fd\u6703\u5c0e\u81f4\u91cd\u5927\u504f\u5dee\uff0c\u6709\u904e\u64ec\u5408\u64ec\u5408\u7591\u616e\u3002","title":"K-fold Cross-Validation"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#leave-one-out","text":"\u6b64\u65b9\u6cd5\u662f K-fold \u5176\u4e2d\u4e00\u7a2e\u7279\u4f8b\uff0c\u7576 K \u7b49\u65bc\u8cc7\u6599\u96c6\u7684\u6578\u91cf\u6642\u5c31\u7b49\u65bc Leave One Out \u65b9\u6cd5\u3002\u4e5f\u5c31\u662f\u5728\u6bcf\u6b21\u8a13\u7df4\u6642\u50c5\u6703\u628a\u4e00\u7b46\u8cc7\u6599\u7576\u6210\u6e2c\u8a66\u8cc7\u6599\uff0c\u5176\u9918\u7684 N-1 \u7b46\u8cc7\u6599\u4f5c\u70ba\u8a13\u7df4\u6a21\u578b\u7684\u8cc7\u6599\u3002\u6b64\u4f5c\u6cd5\u76f8\u7576\u7c21\u55ae\u660e\u77ad\uff0c\u4f46\u662f\u8a13\u7df4\u8ca0\u64d4\u6703\u975e\u5e38\u91cd\u4e14\u8017\u6642\u3002\u7136\u800c Leave p-out \u662f\u53e6\u4e00\u7a2e\u6280\u5de7\uff0c\u5176\u4e2d\u7684 p \u4f7f\u7528\u8005\u53ef\u4ee5\u81ea\u5df1\u8a2d\u5b9a\u6bcf\u6b21\u8a13\u7df4\u9700\u8981\u7559\u5e7e\u7b46\u8cc7\u6599\u4f5c\u70ba\u6e2c\u8a66\u96c6\u3002 \u512a\u9ede : \u7c21\u55ae\u4e14\u5bb9\u6613\u7406\u89e3\uff0c\u597d\u5be6\u4f5c\u3002 \u7f3a\u9ede : \u9700\u8981\u82b1\u8cbb\u66f4\u591a\u7684\u8a13\u7df4\u6642\u9593\u3002","title":"Leave One Out"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#random-subsampling","text":"Random Subsampling \u65b9\u6cd5\u662f\u4e00\u7a2e\u7c21\u55ae\u4e14\u5e38\u7528\u7684\u4ea4\u53c9\u9a57\u8b49\u6280\u8853\uff0c\u5b83\u900f\u904e\u591a\u6b21\u96a8\u6a5f\u62bd\u6a23\u5c07\u8cc7\u6599\u96c6\u5207\u5272\u6210\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u3002\u6bcf\u6b21\u96a8\u6a5f\u5206\u5272\u6642\uff0c\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u56fa\u5b9a\uff0c\u800c\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5247\u96a8\u6a5f\u9078\u53d6\u3002\u9019\u7a2e\u65b9\u6cd5\u4e3b\u8981\u900f\u904e\u591a\u6b21\u96a8\u6a5f\u6e2c\u8a66\u4e0d\u540c\u7684\u8cc7\u6599\u5207\u5206\u65b9\u5f0f\u4f86\u8a55\u4f30\u6a21\u578b\u6027\u80fd\uff0c\u6700\u5f8c\u53d6\u6e2c\u8a66\u7d50\u679c\u7684\u5e73\u5747\u503c\u3002 \u512a\u9ede : \u591a\u6b21\u96a8\u6a5f\u62bd\u6a23\u80fd\u6e1b\u5c11\u8cc7\u6599\u5283\u5206\u7684\u504f\u5dee\u3002 \u53ef\u4ee5\u9748\u6d3b\u9078\u64c7\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u3002 \u7f3a\u9ede : \u591a\u6b21\u91cd\u8907\u62bd\u6a23\u9700\u8981\u8f03\u5927\u7684\u8a08\u7b97\u8cc7\u6e90\u3002 \u6bcf\u6b21\u96a8\u6a5f\u62bd\u6a23\u7684\u8cc7\u6599\u96c6\u53ef\u80fd\u6703\u6709\u6240\u4e0d\u540c\uff0c\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002","title":"Random Subsampling"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#bootstrapping","text":"\u9084\u6709\u4e00\u7a2e\u6bd4\u8f03\u7279\u6b8a\u7684\u4ea4\u53c9\u9a57\u8b49\u65b9\u5f0f\uff0cBootstrapping \u81ea\u52a9\u62bd\u6a23\u6cd5\u3002\u662f\u4e00\u7a2e\u5f9e\u7d66\u5b9a\u8a13\u7df4\u96c6\u4e2d\u6709\u653e\u56de\u7684\u5747\u52fb\u62bd\u6a23\uff0c\u4e5f\u5c31\u662f\u8aaa\uff0c\u6bcf\u7576\u9078\u4e2d\u4e00\u500b\u6a23\u672c\uff0c\u5b83\u7b49\u53ef\u80fd\u5730\u88ab\u518d\u6b21\u9078\u4e2d\u4e26\u88ab\u518d\u6b21\u6dfb\u52a0\u5230\u8a13\u7df4\u96c6\u4e2d\u3002\u5047\u8a2d\u6bcf\u6b21\u8a13\u7df4\u90fd\u63a1\u6a23\u5341\u500b\u6a23\u672c\uff0c\u5728\u9019\u5341\u7b46\u8cc7\u6599\u4e2d\u5f88\u6709\u53ef\u80fd\u6703\u518d\u6b21\u88ab\u96a8\u6a5f\u62bd\u5230\u3002\u5269\u4e0b\u6c92\u6709\u62bd\u5230\u7684\u8cc7\u6599\u5247\u90fd\u8b8a\u6210\u6e2c\u8a66\u96c6\uff0c\u7528\u4f86\u8a55\u4f30\u8a13\u7df4\u5b8c\u7684\u6a21\u578b\u3002 \u512a\u9ede : \u80fd\u5728\u5c0f\u6578\u64da\u96c6\u7684\u60c5\u6cc1\u4e0b\u63d0\u9ad8\u6a21\u578b\u7a69\u5b9a\u6027\u3002 \u53ef\u91cd\u8907\u5229\u7528\u76f8\u540c\u7684\u6578\u64da\u4f86\u9032\u884c\u591a\u6b21\u8a13\u7df4\u3002 \u7f3a\u9ede : \u91cd\u8907\u6a23\u672c\u53ef\u80fd\u5c0e\u81f4\u6a21\u578b\u904e\u64ec\u5408\u3002 \u6e2c\u8a66\u96c6\u4e2d\u8cc7\u6599\u91cf\u8f03\u5c11\uff0c\u53ef\u80fd\u5c0e\u81f4\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u8a55\u4f30\u4e0d\u5920\u5145\u5206\u3002","title":"Bootstrapping"},{"location":"25.\u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb/#_4","text":"\u4ea4\u53c9\u9a57\u8b49\u662f\u8a13\u7df4\u6a21\u578b\u4e2d\u975e\u5e38\u91cd\u8981\u7684\u6280\u5de7\uff0c\u5c24\u5176\u662f\u7576\u624b\u908a\u7684\u8cc7\u6599\u96c6\u6709\u9650\u6642\u66f4\u61c9\u8a72\u4f7f\u7528\u3002\u900f\u904e\u4ea4\u53c9\u9a57\u8b49\u6280\u5de7\uff0c\u5373\u4f7f\u5728\u6578\u64da\u6709\u9650\u7684\u60c5\u6cc1\u4e0b\uff0c\u6211\u5011\u4e5f\u80fd\u5920\u7372\u5f97\u6e96\u78ba\u7684\u7d50\u679c\uff0c\u4e26\u4e14\u53ef\u4ee5\u907f\u514d\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002\u4e26\u70ba\u6211\u5011\u63d0\u4f9b\u66f4\u6e96\u78ba\u7684\u6a21\u578b\u9810\u6e2c\u6027\u80fd\u4f30\u8a08\u65b9\u5f0f\uff0c\u540c\u6642\u4e5f\u80fd\u5920\u63d0\u5347\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002\u4ee5\u4e0a\u7684\u65b9\u6cd5\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 scikit-learn \u88e1\u9762 model_selection \u5e95\u4e0b\u7684 cross_val_score \u65b9\u6cd5\u9032\u884c\u5be6\u4f5c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u5c0f\u7d50"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/","text":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3 K-Fold \u5404\u7a2e\u4e0d\u540c\u8b8a\u5f62 K-Fold Cross Validation Nested K-Fold Cross Validation Repeated K-Fold Cross Validation Stratified K-Fold Cross Validation Group K-Fold Cross Validation \u524d\u8a00 \u4ea4\u53c9\u9a57\u8b49\u53c8\u7a31\u70ba\u6a23\u672c\u5916\u6e2c\u8a66\uff0c\u662f\u8cc7\u6599\u79d1\u5b78\u4e2d\u91cd\u8981\u7684\u4e00\u74b0\u3002\u900f\u904e\u8cc7\u6599\u9593\u7684\u91cd\u8907\u63a1\u6a23\u904e\u7a0b\uff0c\u7528\u65bc\u8a55\u4f30\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u9a57\u8b49\u6a21\u578b\u5c0d\u7368\u7acb\u6e2c\u8a66\u6578\u64da\u96c6\u7684\u6cdb\u5316\u80fd\u529b\u3002\u5728\u4eca\u5929\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u8a73\u7d30\u7684\u4f86\u4ecb\u7d39\u6bcf\u4e00\u7a2e K-Fold \u8b8a\u578b\u3002 K-Fold Cross Validation \u5728 K-Fold \u7684\u65b9\u6cd5\u4e2d\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5207\u5206\u70ba K \u7b49\u4efd\uff0cK \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff1a\u5047\u8a2d\u6211\u5011\u8a2d\u5b9a K=10\uff0c\u4e5f\u5c31\u662f\u5c07\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u5341\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u5341\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u5341\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u4e5d\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u8a13\u7df4\u5341\u56de\u5c07\u6703\u6709\u5341\u500b\u4e0d\u540c\u9a57\u8b49\u96c6\u7684 Error\uff0c\u9019\u500b Error \u901a\u5e38\u6211\u5011\u6703\u7a31\u4f5c loss \u4e5f\u5c31\u662f\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u6709\u5f88\u591a\u7a2e\uff0c\u4ee5\u56de\u6b78\u554f\u984c\u4f86\u8aaa\u5c31\u6709 MSE\u3001MAE\u3001RMSE...\u7b49\u3002\u6700\u7d42\u628a\u9019\u5341\u6b21\u7684 loss \u52a0\u7e3d\u8d77\u4f86\u53d6\u5e73\u5747\u5c31\u53ef\u4ee5\u7576\u6210\u6700\u7d42\u7d50\u679c\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 [scikit-learn] K-Fold Nested K-Fold Cross Validation \u6b64\u65b9\u6cd5\u70ba K-Fold \u7684\u8b8a\u578b\uff0cNested \u610f\u6307\u96d9\u8ff4\u5708(\u5de2\u72c0)\u7684\u610f\u601d\u3002\u5206\u5225\u6709\u5916\u5c64\u8ff4\u5708(Outer Loop)\u70ba\u4e00\u822c\u6b63\u5e38\u7684 K-Fold\u3002\u552f\u4e00\u4e0d\u540c\u7684\u662f\u6211\u5011\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\u6703\u5c07\u5916\u5c64 K-Fold \u7684\u8a13\u7df4\u96c6\u62ff\u51fa\u4f86\u518d\u9032\u5165\u5230\u5167\u5c64\u8ff4\u5708(Inner Loop)\u518d\u505a\u4e00\u6b21 K-Fold\u3002\u7531\u4e0b\u5716\u53ef\u4ee5\u770b\u5230\uff0c(1)\u6211\u5011\u53ef\u4ee5\u5728\u7b2c\u4e00\u500b\u5916\u5c64\u56de\u5708\u4e2d\u5c07\u8a13\u7df4\u8cc7\u6599\u53c8\u5207\u70ba\u4e94\u4efd\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5167\u5c64\u5708\u900f\u904e Grid Search \u7b49\u6f14\u7b97\u6cd5\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u3002\u7b49\u627e\u5230\u6700\u597d\u7684\u6a21\u578b\u8d85\u53c3\u6578\u5f8c\uff0c\u6211\u5011\u518d\u62ff(2)\u5916\u5c64\u56de\u5708\u7684\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a55\u4f30\u4e26\u8a08\u7b97 loss\u3002\u6700\u7d42\u6211\u5011\u6703\u5f97\u5230\u4e94\u500b\u6e2c\u8a66\u96c6 loss \u7684\u5e73\u5747\u4f5c\u70ba\u4ea4\u53c9\u9a57\u8b49\u6a21\u578b\u8a55\u4f30\u7d50\u679c\u3002 [scikit-learn] Nested K-Fold Repeated K-Fold Cross Validation \u53e6\u4e00\u500b K-Fold \u8b8a\u578b\u70ba Repeated K-Fold \u9867\u540d\u601d\u7fa9\u5c31\u662f\u91cd\u8907 n \u6b21 K-Fold cross-validation\u3002\u5047\u8a2d K=2\u3001n=2 \u4ee3\u8868 2-fold cross validation\uff0c\u5728\u6bcf\u4e00\u56de\u5408\u53c8\u6703\u5c07\u8cc7\u6599\u5c07\u6703\u6253\u4e82\u5f97\u5230\u65b0\u7d44\u5408\u3002\u56e0\u6b64\u6700\u7d42\u6703\u5f97\u5230 4 \u7d44\u7684\u8cc7\u6599\uff0c\u610f\u5473\u8457\u6a21\u578b\u5c07\u8a13\u7df4\u56db\u904d\u3002\u6b64\u7a2e\u65b9\u6cd5\u6703\u78ba\u4fdd\u6bcf\u6b21\u7d44\u5408\u7684\u96a8\u6a5f\u8cc7\u6599\u4e26\u4e0d\u6703\u91cd\u8907\u3002\u7c21\u55ae\u4f86\u8aaa\u57f7\u884c K-Fold \u4ea4\u53c9\u9a57\u8b49\uff0c\u7136\u5f8c\u91cd\u65b0\u6d17\u724c\u6578\u64da\uff0c\u7136\u5f8c\u518d\u6b21\u57f7\u884c K-Fold\u3002 [scikit-learn] RepeatedKFold Stratified K-Fold Cross Validation \u5206\u5c64\u4ea4\u53c9\u9a57\u8b49\uff0c\u6bcf\u500b Fold \u90fd\u662f\u6309\u7167\u985e\u5225\u7684\u6bd4\u4f8b\u62bd\u51fa\u4f86\u7684\u3002\u5047\u8a2d\u9019\u500b\u5206\u985e\u4efb\u52d9\u4e00\u5171\u6709\u4e09\u500b\u985e\u5225A\u3001B\u3001C\uff0c\u5b83\u5011\u7684\u6bd4\u4f8b\u662f1:4:8\u3002\u90a3\u9ebc\u6bcf\u500bfold\u4e2d\u7684A\u3001B\u3001C\u7684\u6bd4\u4f8b\u4e5f\u5fc5\u9808\u662f1:4:8\u3002\u5176\u5be6\u73fe\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f9d\u5e8f\u628aA\u3001B\u3001C\u985e\u5225\u7684\u6578\u64da\u96a8\u6a5f\u5206\u6210k\u7d44\uff0c\u6700\u5f8c\u518d\u628a\u5b83\u5011\u5408\u4f75\u4f9d\u7167\u6bd4\u4f8b\u8d77\u4f86\uff0c\u5c31\u5f97\u5230\u4e86k\u7d44\u6eff\u8db31:2:10\u7684\u6578\u64da\u4e86\u3002 \u512a\u9ede : \u512a\u65bc\u4e00\u822c\u7684 K-Fold \u56e0\u70batest set\u80fd\u5145\u5206\u4ee3\u8868\u6574\u9ad4\u6578\u64da\u3002 \u9810\u6e2c\u7d50\u679c\u7684\u65b9\u5dee\u4e5f\u6703\u8b8a\u5c0f\uff0c\u4f7f\u5f97\u4ea4\u53c9\u9a57\u8b49\u7684 error \u66f4\u53ef\u9760\u3002 \u5c0d\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u6578\u64da\u5f88\u6709\u7528 \u7f3a\u9ede : \u5927\u591a\u5be6\u4f8b\u90fd\u4ee5\u5206\u985e\u554f\u984c\u70ba\u4e3b [scikit-learn] StratifiedKFold [scikit-learn] StratifiedShuffleSplit Group K-Fold Cross Validation \u6b64\u505a\u6cd5\u70ba\u4e86\u907f\u514d\u53d6\u9023\u7e8c\u7684\u8cc7\u6599\u800c\u9020\u6210\u6e2c\u8a66\u96c6\u6216\u9a57\u8b49\u96c6\u504f\u5411\u67d0\u4e00\u7279\u5225\u7684\u72c0\u6cc1\u800c\u9020\u6210\u904e\u5ea6\u64ec\u548c\u8a13\u7df4\u96c6\uff0c\u53cd\u800c\u5728\u672a\u770b\u904e\u7684\u8cc7\u6599\u4e0b\u8868\u73fe\u4e0d\u597d\u3002Group K-Fold \u70ba\u4e86\u907f\u514d\u6b64\u60c5\u6cc1\u767c\u751f\uff0c\u5b83\u5207\u5272\u8cc7\u6599\u6642\u6709\u6548\u7684\u5f9e\u8cc7\u6599\u96c6\u4e2d\u6bcf\u500b\u5340\u584a\u96a8\u6a5f\u6311\u9078\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u540c\u6642\u4fdd\u8b49\u6bcf\u4e00\u500b Fold \u7684\u9a57\u8b49\u96c6\u4e26\u4e0d\u6703\u91cd\u8907\u7684\u8cc7\u6599\u3002\u5047\u8a2d\u4f60\u6709\u4e09\u500b\u985e\u5225\uff0c\u81f3\u5c11\u9a57\u8b49\u96c6\u5fc5\u9808\u5f9e\u4e09\u500b\u4e0d\u540c\u7684\u5206\u7d44\u4e2d\u62bd\u6a23\u53d6\u51fa\uff0c\u540c\u6642\u78ba\u4fdd\u6bcf\u4e00\u500b Fold \u6240\u62bd\u51fa\u4f86\u7684\u9019\u4e09\u500b\u5206\u7d44\u4e26\u4e0d\u6703\u91cd\u8907\u3002 [scikit-learn] GroupKFold \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#day-26-k-fold-cross-validation","text":"","title":"[Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#_1","text":"\u4e86\u89e3 K-Fold \u5404\u7a2e\u4e0d\u540c\u8b8a\u5f62 K-Fold Cross Validation Nested K-Fold Cross Validation Repeated K-Fold Cross Validation Stratified K-Fold Cross Validation Group K-Fold Cross Validation","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#_2","text":"\u4ea4\u53c9\u9a57\u8b49\u53c8\u7a31\u70ba\u6a23\u672c\u5916\u6e2c\u8a66\uff0c\u662f\u8cc7\u6599\u79d1\u5b78\u4e2d\u91cd\u8981\u7684\u4e00\u74b0\u3002\u900f\u904e\u8cc7\u6599\u9593\u7684\u91cd\u8907\u63a1\u6a23\u904e\u7a0b\uff0c\u7528\u65bc\u8a55\u4f30\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4e26\u9a57\u8b49\u6a21\u578b\u5c0d\u7368\u7acb\u6e2c\u8a66\u6578\u64da\u96c6\u7684\u6cdb\u5316\u80fd\u529b\u3002\u5728\u4eca\u5929\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u8a73\u7d30\u7684\u4f86\u4ecb\u7d39\u6bcf\u4e00\u7a2e K-Fold \u8b8a\u578b\u3002","title":"\u524d\u8a00"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#k-fold-cross-validation","text":"\u5728 K-Fold \u7684\u65b9\u6cd5\u4e2d\u6211\u5011\u6703\u5c07\u8cc7\u6599\u5207\u5206\u70ba K \u7b49\u4efd\uff0cK \u662f\u7531\u6211\u5011\u81ea\u7531\u8abf\u63a7\u7684\uff0c\u4ee5\u4e0b\u5716\u70ba\u4f8b\uff1a\u5047\u8a2d\u6211\u5011\u8a2d\u5b9a K=10\uff0c\u4e5f\u5c31\u662f\u5c07\u8a13\u7df4\u96c6\u5207\u5272\u70ba\u5341\u7b49\u4efd\u3002\u9019\u610f\u5473\u8457\u76f8\u540c\u7684\u6a21\u578b\u8981\u8a13\u7df4\u5341\u6b21\uff0c\u6bcf\u4e00\u6b21\u7684\u8a13\u7df4\u90fd\u6703\u5f9e\u9019\u5341\u7b49\u4efd\u6311\u9078\u5176\u4e2d\u4e5d\u7b49\u4efd\u4f5c\u70ba\u8a13\u7df4\u8cc7\u6599\uff0c\u5269\u4e0b\u4e00\u7b49\u4efd\u672a\u53c3\u8207\u8a13\u7df4\u4e26\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u8a13\u7df4\u5341\u56de\u5c07\u6703\u6709\u5341\u500b\u4e0d\u540c\u9a57\u8b49\u96c6\u7684 Error\uff0c\u9019\u500b Error \u901a\u5e38\u6211\u5011\u6703\u7a31\u4f5c loss \u4e5f\u5c31\u662f\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u3002\u6a21\u578b\u8a55\u4f30\u65b9\u5f0f\u6709\u5f88\u591a\u7a2e\uff0c\u4ee5\u56de\u6b78\u554f\u984c\u4f86\u8aaa\u5c31\u6709 MSE\u3001MAE\u3001RMSE...\u7b49\u3002\u6700\u7d42\u628a\u9019\u5341\u6b21\u7684 loss \u52a0\u7e3d\u8d77\u4f86\u53d6\u5e73\u5747\u5c31\u53ef\u4ee5\u7576\u6210\u6700\u7d42\u7d50\u679c\u3002\u900f\u904e\u9019\u7a2e\u65b9\u5f0f\uff0c\u4e0d\u540c\u5206\u7d44\u8a13\u7df4\u7684\u7d50\u679c\u9032\u884c\u5e73\u5747\u4f86\u6e1b\u5c11\u65b9\u5dee\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u6027\u80fd\u5c0d\u6578\u64da\u7684\u5283\u5206\u5c31\u4e0d\u6703\u90a3\u9ebc\u654f\u611f\u3002 \u53c3\u8003 [scikit-learn] K-Fold","title":"K-Fold Cross Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#nested-k-fold-cross-validation","text":"\u6b64\u65b9\u6cd5\u70ba K-Fold \u7684\u8b8a\u578b\uff0cNested \u610f\u6307\u96d9\u8ff4\u5708(\u5de2\u72c0)\u7684\u610f\u601d\u3002\u5206\u5225\u6709\u5916\u5c64\u8ff4\u5708(Outer Loop)\u70ba\u4e00\u822c\u6b63\u5e38\u7684 K-Fold\u3002\u552f\u4e00\u4e0d\u540c\u7684\u662f\u6211\u5011\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\u6703\u5c07\u5916\u5c64 K-Fold \u7684\u8a13\u7df4\u96c6\u62ff\u51fa\u4f86\u518d\u9032\u5165\u5230\u5167\u5c64\u8ff4\u5708(Inner Loop)\u518d\u505a\u4e00\u6b21 K-Fold\u3002\u7531\u4e0b\u5716\u53ef\u4ee5\u770b\u5230\uff0c(1)\u6211\u5011\u53ef\u4ee5\u5728\u7b2c\u4e00\u500b\u5916\u5c64\u56de\u5708\u4e2d\u5c07\u8a13\u7df4\u8cc7\u6599\u53c8\u5207\u70ba\u4e94\u4efd\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5167\u5c64\u5708\u900f\u904e Grid Search \u7b49\u6f14\u7b97\u6cd5\u4f86\u5c0b\u627e\u6700\u4f73\u8d85\u53c3\u6578\u3002\u7b49\u627e\u5230\u6700\u597d\u7684\u6a21\u578b\u8d85\u53c3\u6578\u5f8c\uff0c\u6211\u5011\u518d\u62ff(2)\u5916\u5c64\u56de\u5708\u7684\u6e2c\u8a66\u8cc7\u6599\u9032\u884c\u6a21\u578b\u8a55\u4f30\u4e26\u8a08\u7b97 loss\u3002\u6700\u7d42\u6211\u5011\u6703\u5f97\u5230\u4e94\u500b\u6e2c\u8a66\u96c6 loss \u7684\u5e73\u5747\u4f5c\u70ba\u4ea4\u53c9\u9a57\u8b49\u6a21\u578b\u8a55\u4f30\u7d50\u679c\u3002 [scikit-learn] Nested K-Fold","title":"Nested K-Fold Cross Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#repeated-k-fold-cross-validation","text":"\u53e6\u4e00\u500b K-Fold \u8b8a\u578b\u70ba Repeated K-Fold \u9867\u540d\u601d\u7fa9\u5c31\u662f\u91cd\u8907 n \u6b21 K-Fold cross-validation\u3002\u5047\u8a2d K=2\u3001n=2 \u4ee3\u8868 2-fold cross validation\uff0c\u5728\u6bcf\u4e00\u56de\u5408\u53c8\u6703\u5c07\u8cc7\u6599\u5c07\u6703\u6253\u4e82\u5f97\u5230\u65b0\u7d44\u5408\u3002\u56e0\u6b64\u6700\u7d42\u6703\u5f97\u5230 4 \u7d44\u7684\u8cc7\u6599\uff0c\u610f\u5473\u8457\u6a21\u578b\u5c07\u8a13\u7df4\u56db\u904d\u3002\u6b64\u7a2e\u65b9\u6cd5\u6703\u78ba\u4fdd\u6bcf\u6b21\u7d44\u5408\u7684\u96a8\u6a5f\u8cc7\u6599\u4e26\u4e0d\u6703\u91cd\u8907\u3002\u7c21\u55ae\u4f86\u8aaa\u57f7\u884c K-Fold \u4ea4\u53c9\u9a57\u8b49\uff0c\u7136\u5f8c\u91cd\u65b0\u6d17\u724c\u6578\u64da\uff0c\u7136\u5f8c\u518d\u6b21\u57f7\u884c K-Fold\u3002 [scikit-learn] RepeatedKFold","title":"Repeated K-Fold Cross Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#stratified-k-fold-cross-validation","text":"\u5206\u5c64\u4ea4\u53c9\u9a57\u8b49\uff0c\u6bcf\u500b Fold \u90fd\u662f\u6309\u7167\u985e\u5225\u7684\u6bd4\u4f8b\u62bd\u51fa\u4f86\u7684\u3002\u5047\u8a2d\u9019\u500b\u5206\u985e\u4efb\u52d9\u4e00\u5171\u6709\u4e09\u500b\u985e\u5225A\u3001B\u3001C\uff0c\u5b83\u5011\u7684\u6bd4\u4f8b\u662f1:4:8\u3002\u90a3\u9ebc\u6bcf\u500bfold\u4e2d\u7684A\u3001B\u3001C\u7684\u6bd4\u4f8b\u4e5f\u5fc5\u9808\u662f1:4:8\u3002\u5176\u5be6\u73fe\u65b9\u5f0f\u4e5f\u975e\u5e38\u7c21\u55ae\uff0c\u9996\u5148\u4f9d\u5e8f\u628aA\u3001B\u3001C\u985e\u5225\u7684\u6578\u64da\u96a8\u6a5f\u5206\u6210k\u7d44\uff0c\u6700\u5f8c\u518d\u628a\u5b83\u5011\u5408\u4f75\u4f9d\u7167\u6bd4\u4f8b\u8d77\u4f86\uff0c\u5c31\u5f97\u5230\u4e86k\u7d44\u6eff\u8db31:2:10\u7684\u6578\u64da\u4e86\u3002 \u512a\u9ede : \u512a\u65bc\u4e00\u822c\u7684 K-Fold \u56e0\u70batest set\u80fd\u5145\u5206\u4ee3\u8868\u6574\u9ad4\u6578\u64da\u3002 \u9810\u6e2c\u7d50\u679c\u7684\u65b9\u5dee\u4e5f\u6703\u8b8a\u5c0f\uff0c\u4f7f\u5f97\u4ea4\u53c9\u9a57\u8b49\u7684 error \u66f4\u53ef\u9760\u3002 \u5c0d\u65bc\u8cc7\u6599\u4e0d\u5e73\u8861\u7684\u6578\u64da\u5f88\u6709\u7528 \u7f3a\u9ede : \u5927\u591a\u5be6\u4f8b\u90fd\u4ee5\u5206\u985e\u554f\u984c\u70ba\u4e3b [scikit-learn] StratifiedKFold [scikit-learn] StratifiedShuffleSplit","title":"Stratified K-Fold Cross Validation"},{"location":"26.\u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation/#group-k-fold-cross-validation","text":"\u6b64\u505a\u6cd5\u70ba\u4e86\u907f\u514d\u53d6\u9023\u7e8c\u7684\u8cc7\u6599\u800c\u9020\u6210\u6e2c\u8a66\u96c6\u6216\u9a57\u8b49\u96c6\u504f\u5411\u67d0\u4e00\u7279\u5225\u7684\u72c0\u6cc1\u800c\u9020\u6210\u904e\u5ea6\u64ec\u548c\u8a13\u7df4\u96c6\uff0c\u53cd\u800c\u5728\u672a\u770b\u904e\u7684\u8cc7\u6599\u4e0b\u8868\u73fe\u4e0d\u597d\u3002Group K-Fold \u70ba\u4e86\u907f\u514d\u6b64\u60c5\u6cc1\u767c\u751f\uff0c\u5b83\u5207\u5272\u8cc7\u6599\u6642\u6709\u6548\u7684\u5f9e\u8cc7\u6599\u96c6\u4e2d\u6bcf\u500b\u5340\u584a\u96a8\u6a5f\u6311\u9078\u4f5c\u70ba\u9a57\u8b49\u96c6\u3002\u540c\u6642\u4fdd\u8b49\u6bcf\u4e00\u500b Fold \u7684\u9a57\u8b49\u96c6\u4e26\u4e0d\u6703\u91cd\u8907\u7684\u8cc7\u6599\u3002\u5047\u8a2d\u4f60\u6709\u4e09\u500b\u985e\u5225\uff0c\u81f3\u5c11\u9a57\u8b49\u96c6\u5fc5\u9808\u5f9e\u4e09\u500b\u4e0d\u540c\u7684\u5206\u7d44\u4e2d\u62bd\u6a23\u53d6\u51fa\uff0c\u540c\u6642\u78ba\u4fdd\u6bcf\u4e00\u500b Fold \u6240\u62bd\u51fa\u4f86\u7684\u9019\u4e09\u500b\u5206\u7d44\u4e26\u4e0d\u6703\u91cd\u8907\u3002 [scikit-learn] GroupKFold \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Group K-Fold Cross Validation"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/","text":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u7684\u5341\u500b\u932f\u8aa4 \u524d\u8a00 \u4eba\u5de5\u667a\u6167\u8fd1\u5e74\u4f86\u6210\u70ba\u4efb\u4f55\u7522\u696d\u71b1\u9580\u7684\u8a71\u984c\u4e4b\u4e00\uff0c\u5404\u516c\u53f8\u7a4d\u6975\u5730\u5c0e\u5165\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u5354\u52a9\u7522\u696d AI \u5316\u3002\u4f8b\u5982\uff1a\u667a\u6167\u91ab\u7642\u3001\u667a\u6167\u4ea4\u901a\u3001\u667a\u6167\u88fd\u9020......\u7b49\u3002\u6b63\u662f\u56e0\u70ba AI \u6280\u8853\u7684\u5275\u65b0\u8207\u666e\u53ca\uff0c\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u518d\u4e5f\u4e0d\u662f\u7406\u5de5\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\u3002\u6b64\u5916\u96a8\u8457 Python \u958b\u767c\u793e\u7fa4\u8301\u58ef\uff0c\u8a31\u591a\u958b\u6e90\u7684 AI \u5957\u4ef6\u5982\u96e8\u5f8c\u6625\u7b4d\u822c\u7684\u51fa\u73fe\u5927\u5927\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u9580\u6abb\u3002\u5728\u4eca\u5929\u7684\u5167\u5bb9\u4e2d\u6211\u60f3\u85c9\u7531\u9435\u4eba\u8cfd\u4f86\u8ddf\u5927\u5bb6\u5206\u4eab\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b\uff0c\u4e26\u4e14\u5f9e\u8cc7\u6599\u9762\u8207\u6a21\u578b\u9762\u7684\u89d2\u5ea6\u4f86\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u61c9\u8a72\u6ce8\u610f\u7684\u5e7e\u4ef6\u4e8b\u3002\u5c24\u5176\u662f\u5728\u521d\u5b78\u968e\u6bb5\uff0c\u56e0\u7f3a\u4e4f\u7d93\u9a57\u5f80\u5f80\u6703\u72af\u4e00\u4e9b\u7121\u53ef\u907f\u514d\u7684\u932f\u8aa4\u3002\u6240\u4ee5\u9019\u7bc7\u6587\u7ae0\u5c07\u9ede\u51fa\u5341\u500b\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5e38\u72af\u7684\u96b1\u5f62\u932f\u8aa4\u3002 \u8cc7\u6599\u9762 \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u6a21\u578b\u9762 \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a 1. \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u6a5f\u5668\u5b78\u7fd2\u9996\u8981\u7684\u6b65\u9a5f\u662f\u5b9a\u7fa9\u554f\u984c\uff0c\u7576\u78ba\u5b9a\u76ee\u6a19\u8207\u65b9\u5411\u5f8c\u5373\u53ef\u958b\u59cb\u641c\u96c6\u8cc7\u6599\u3002\u76f8\u4fe1\u5927\u5bb6\u90fd\u77e5\u9053\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f97\u4f86\u4e0d\u6613\uff0c\u5373\u4f7f\u5f9e\u8cc7\u6599\u5eab\u53d6\u5f97\u4e86\u9019\u4e9b\u8cc7\u6599\u5f8c\u6211\u5011\u9084\u9700\u8981\u82b1\u5927\u91cf\u7684\u6642\u9593\u9032\u884c\u8cc7\u6599\u6e05\u6d17\u3002\u6240\u8b02\u7684\u8cc7\u6599\u6e05\u6d17\u662f\u8cc7\u6599\u5eab\u7576\u4e2d\u53ef\u80fd\u6703\u6709\u7f3a\u5931\u503c\uff0c\u4f8b\u5982\uff1aNA\u3001Inf\u3001NaN\u3001NULL\u3002 NA\uff1a\u8868\u793a\u7f3a\u5931\u503c\uff0c\u662f Not Available \u7684\u7e2e\u5beb\u3002 Inf\uff1a\u8868\u793a\u7121\u7aae\u5927\uff0c\u662f Infinite \u7684\u7e2e\u5beb\u3002 NaN\uff1a\u8868\u793a\u975e\u6578\u503c\uff0c\u662f Not a Number \u7684\u7e2e\u5beb\u3002 NULL\uff1a\u8868\u793a\u7a7a\u503c\uff0c\u5373\u6c92\u6709\u5167\u5bb9\u3002 \u7576\u8cc7\u6599\u90fd\u5b8c\u6210\u4e86\u524d\u8655\u7406\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u5efa\u7acb\u6a21\u578b\u8207\u8a55\u4f30\u6a21\u578b\u3002\u4f46\u662f\u7576\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u8868\u73fe\u4e0d\u597d\u6709\u5f88\u591a\u7684\u56e0\u7d20\u3002\u5927\u5bb6\u6700\u5e38\u505a\u7684\u662f\u66ff\u63db\u6a21\u578b\u6f14\u7b97\u6cd5\uff0c\u6216\u662f\u5617\u8a66\u4e0d\u540c\u7684\u6a21\u578b\u8d85\u53c3\u6578\u53d6\u5f97\u4e00\u500b\u6700\u4f73\u7684\u7d50\u679c\u3002\u4f46\u662f\u5728\u9032\u884c\u9019\u4e9b\u505a\u4e4b\u524d\uff0c\u5efa\u8b70\u5927\u5bb6\u5148\u628a\u95dc\u6ce8\u7684\u9ede\u56de\u5230\u8cc7\u6599\u8655\u7406\u9762\u3002\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u5176\u4e2d\u4e00\u500b\u56e0\u7d20\u662f\u8cc7\u6599\u7684\u6a19\u7c64\u6536\u96c6\u4e0d\u7576\u3002Landing.ai \u57f7\u884c\u9577\u5433\u6069\u9054\u4e5f\u66fe\u7d93\u8aaa\u904e\u7576\u4e00\u500b\u5c0f\u8cc7\u6599\u96c6\u5b58\u5728\u8457\u932f\u8aa4\u6a19\u7c64\u6642\uff0c\u6a21\u578b\u5f88\u96e3\u7d66\u51fa\u4e00\u500b\u6b63\u78ba\u7684\u8f38\u51fa\u3002\u56e0\u70ba\u8cc7\u6599\u9593\u593e\u5e36\u4e86\u96dc\u8a0a\u5f80\u5f80\u6703\u4f7f\u7684\u6a21\u578b\u5b58\u5728\u8457\u4e00\u4e9b\u504f\u5dee\uff0c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002\u56e0\u6b64\u7b46\u8005\u5efa\u8b70\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u6642\u5019\uff0c\u53ef\u4ee5\u56de\u982d\u89c0\u5bdf\u8cc7\u6599\u662f\u5426\u5b58\u5728\u4e00\u4e9b\u932f\u8aa4\u3002\u800c\u4e0d\u662f\u4e00\u6627\u7684\u8abf\u6574\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u8d85\u53c3\u6578\u3002 2. \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u5728\u5206\u985e\u7684\u8cc7\u6599\u4e2d\uff0c\u521d\u5b78\u8005\u5e38\u898b\u7684\u932f\u8aa4\u662f\u5fd8\u8a18\u4f7f\u7528\u5206\u5c64\u62bd\u6a23 (stratify) \u4f86\u5c0d\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u9032\u884c\u5207\u5272\u3002\u7576\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u76e1\u53ef\u80fd\u8207\u8a13\u7df4\u76f8\u540c\u60c5\u6cc1\u4e0b\uff0c\u6a21\u578b\u624d\u66f4\u6709\u53ef\u80fd\u5f97\u5230\u66f4\u6e96\u78ba\u7684\u9810\u6e2c\u3002\u7136\u800c\u5728\u5206\u985e\u7684\u554f\u984c\u4e2d\uff0c\u6211\u5011\u5fc5\u9808\u66f4\u95dc\u6ce8\u6bcf\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002\u4ee5\u4e0b\u8209\u500b\u4f8b\u5b50\uff1a\u5047\u8a2d\u6211\u5011\u6709\u4e09\u500b\u6a19\u7c64\u7684\u985e\u5225\uff0c\u800c\u9019\u4e09\u500b\u985e\u5225\u7684\u5206\u4f48\u6bd4\u4f8b\u5206\u5225\u70ba 4:3:3\u3002\u540c\u7406\u6211\u5011\u5728\u9032\u884c\u8cc7\u6599\u5207\u5272\u7684\u6642\u5019\u5fc5\u9808\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u9700\u8981\u6709\u76f8\u540c\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002 \u5927\u5bb6\u61c9\u8a72\u90fd\u4f7f\u7528\u904e Sklearn \u7684 train_test_split \u9032\u884c\u8cc7\u6599\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d Sklearn \u63d0\u4f9b\u4e86\u4e00\u500b stratify \u53c3\u6578\u9054\u5230\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u7684\u76ee\u7684\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\uff1a\u4f8b\u5982\uff0c\u8ca0\u6a23\u672c\u8207\u6b63\u6a23\u672c\u6bd4\u4f8b\u61f8\u6b8a(\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u3001\u96e2\u8077\u54e1\u5de5\u9810\u6e2c)\u3002\u4ee5\u4e0b\u7528\u7d05\u9152\u5206\u985e\u9810\u6e2c\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u9996\u5148\u6211\u5011\u4e0d\u4f7f\u7528 stratify \u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u4e26\u67e5\u770b\u8cc7\u6599\u5207\u5272\u524d\u5f8c\u7684\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b\u3002 import pandas as pd from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split X , y = load_wine ( return_X_y = True ) # \u67e5\u770b\u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y ) . value_counts ( normalize = True ) # \u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398876 0 0.331461 2 0.269663 dtype: float64 # \u5be6\u9a57\u4e00: \u4e0d\u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.390977 0 0.330827 2 0.278195 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.511111 0 0.266667 2 0.222222 dtype: float64 \u5f9e\u4e0a\u9762\u5207\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u90fd\u4e0d\u540c\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u4f7f\u7528 stratify \u53c3\u6578\u518d\u5207\u5272\u4e00\u6b21\u3002 # \u5be6\u9a57\u4e8c: \u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.400000 0 0.333333 2 0.266667 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398496 0 0.330827 2 0.270677 dtype: float64 \u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5c07 stratify \u8a2d\u7f6e\u70ba\u76ee\u6a19 (y) \u5728\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u4e2d\u7522\u751f\u76f8\u540c\u7684\u5206\u4f48\u3002\u56e0\u70ba\u6539\u8b8a\u7684\u985e\u5225\u7684\u6bd4\u4f8b\u662f\u4e00\u500b\u56b4\u91cd\u7684\u554f\u984c\uff0c\u53ef\u80fd\u6703\u4f7f\u6a21\u578b\u66f4\u504f\u5411\u65bc\u7279\u5b9a\u7684\u985e\u5225\u3002\u56e0\u6b64\u8a13\u7df4\u8cc7\u6599\u7684\u5206\u4f48\u5fc5\u9808\u8981\u8207\u5be6\u969b\u60c5\u6cc1\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002 3. \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u8cc7\u6599\u8996\u89ba\u5316\u7684\u597d\u8655\u591a\u591a\uff0c\u5728\u672c\u7cfb\u5217\u6587\u7ae0 [Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\uff1f\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427\uff01 \u8207 [Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u8b1b\u89e3\u4e86\u8a31\u591a Python \u8cc7\u6599\u8996\u89ba\u5316\u7684\u6280\u5de7\u3002\u8cc7\u6599\u8996\u89ba\u5316\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5206\u6790\u8207\u7d71\u8a08\u8cc7\u6599\u7684\u578b\u614b\uff0c\u5f80\u5f80\u6709\u597d\u7684\u8cc7\u6599\u6e05\u6d17\u8207\u524d\u8655\u7406\u5c0d\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u6703\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u6709\u8208\u8da3\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003 \u5b89\u65af\u5eab\u59c6\u56db\u91cd (Anscombe\u2019s quartet) \u3002\u4ed6\u4e3b\u8981\u662f\u662f\u900f\u904e\u56db\u500b\u5c0f\u8cc7\u6599\u96c6\u4e26\u900f\u904e\u8996\u89ba\u5316\u8207\u7d71\u8a08\u4f86\u89c0\u5bdf\uff0c\u4e26\u8aaa\u660e\u5728\u5206\u6790\u6578\u64da\u524d\u5148\u7e6a\u88fd\u5716\u8868\u7684\u91cd\u8981\u6027\uff0c\u4ee5\u53ca\u96e2\u7fa4\u503c\u5c0d\u7d71\u8a08\u7684\u5f71\u97ff\u4e4b\u5927\u3002 4. \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u901a\u5e38\u6211\u5011\u8981\u70ba\u985e\u5225\u7684\u7279\u5fb5\u9032\u884c\u7de8\u78bc\uff0c\u76f4\u89ba\u6703\u60f3\u5230 Sklearn \u7684 LabelEncoder \u3002\u4f46\u662f\u5982\u679c\u4e00\u500b\u8cc7\u6599\u96c6\u4e2d\u6709\u591a\u500b\u7279\u5fb5\u662f\u5c6c\u65bc\u985e\u5225\u578b\u7684\u8cc7\u6599\uff0c\u8c48\u4e0d\u662f\u5f88\u9ebb\u7169?\u5fc5\u9808\u8981\u4e00\u500b\u4e00\u500b\u547c\u53eb LabelEncoder \u5206\u5225\u70ba\u9019\u4e9b\u7279\u5fb5\u9032\u884c\u8f49\u63db\u3002\u5982\u679c\u4f60\u770b\u5230\u9019\u908a\u6709\u540c\u611f\u7684\uff0c\u5728\u9019\u88e1\u8981\u544a\u8a34\u4f60\u4e8b\u5be6\u4e26\u975e\u5982\u6b64\uff01\u6211\u5011\u770b\u770b \u5728\u5b98\u65b9\u6587\u4ef6\u4e0b LabelEncoder \u7684\u63cf\u8ff0\uff1a This transformer should be used to encode target values, i.e. y, and not the input X. \u7c21\u55ae\u4f86\u8aaa LabelEncoder \u53ea\u662f\u88ab\u7528\u4f86\u7de8\u78bc\u8f38\u51fa\u9805 y \u800c\u5df2\u7684\uff01\u4f60\u9084\u5728\u7528\u5b83\u4f86\u7de8\u78bc\u4f60\u7684\u6bcf\u500b x \u55ce\uff1f\uff08\u6688 \u90a3\u9ebc\u6211\u5011\u8a72\u7528\u4ec0\u9ebc\u65b9\u6cd5\u4f86\u7de8\u78bc\u6709\u9806\u5e8f\u7684\u985e\u5225\u7279\u5fb5\u5462\uff1f\u5982\u679c\u4f60\u4ed4\u7d30\u95b1\u8b80\u6709\u95dc\u7de8\u78bc\u5206\u985e\u7279\u5fb5\u7684 Sklearn \u7528\u6236\u6307\u5357\uff0c\u4f60\u6703\u770b\u5230\u5b83\u6e05\u695a\u5730\u8aaa\u660e\uff1a To convert categorical features to integer codes, we can use the OrdinalEncoder. This estimator transforms each categorical feature to one new feature of integers (0 to n_categories - 1) \u770b\u5230\u9019\u908a\u5927\u5bb6\u61c9\u8a72\u77e5\u9053\u95b1\u8b80\u5b98\u65b9\u6587\u4ef6\u7684\u91cd\u8981\u6027\u5427\uff01\u5b98\u65b9\u6587\u4ef6\u4e2d\u5efa\u8b70 x \u9805\u7684\u8f38\u5165\u7279\u5fb5\u53ef\u4ee5\u63a1\u7528 OrdinalEncoder \u4e00\u6b21\u70ba\u6240\u6709\u7279\u5fb5\u4f9d\u5e8f\u505a Label Encoding\u3002OrdinalEncoder \u7de8\u78bc\u5668\u7684\u4f7f\u7528\u65b9\u5f0f\u5982\u4e0b\uff1a from sklearn.preprocessing import OrdinalEncoder enc = OrdinalEncoder () X = [[ 'Male' , 1 ], [ 'Female' , 3 ], [ 'Female' , 2 ]] enc . fit ( X ) print ( enc . categories_ ) enc . transform ([[ 'Female' , 3 ], [ 'Male' , 1 ]]) [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)] array([[0., 2.], [1., 0.]]) \u4ee5\u4e0a\u7684\u7bc4\u4f8b\u662f X \u6709\u4e09\u7b46\u8cc7\u6599\uff0c\u6bcf\u7b46\u8cc7\u6599\u90fd\u6709\u5169\u500b\u7279\u5fb5\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7b2c\u4e00\u500b\u7279\u5fb5\u662f\u6027\u5225 Male \u8207 Female\uff0c\u56e0\u6b64 OrdinalEncoder \u6703\u4f9d\u9020\u5b57\u6bcd\u958b\u982d\u505a\u6392\u5e8f Female \u7de8\u78bc\u70ba 0 \u800c Male \u7de8\u78bc\u70ba 1\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u7279\u5fb5\u70ba\u6578\u5b57 1\u30012\u30013\uff0c\u540c\u7406\u4f9d\u5e8f\u70ba\u4ed6\u5011\u7de8\u78bc\u6210 0\u30011\u30012\u3002\u53ea\u9700\u95b1\u8b80\u5b98\u65b9\u6587\u6a94\u548c\u7528\u6236\u6307\u5357\uff0c\u4f60\u5c31\u53ef\u4ee5\u4e86\u89e3\u5f88\u591a\u95dc\u65bc Sklearn \u7684\u77e5\u8b58\uff01\u662f\u4e0d\u662f\u5f88\u68d2\uff5e 5. \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u8cc7\u6599\u6d29\u6f0f (data leakage) \u662f\u500b\u96b1\u5f62\u6bba\u624b\uff0c\u5b83\u6703\u5728\u4e0d\u77e5\u4e0d\u89ba\u4e2d\u5f71\u97ff\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3002\u5176\u767c\u751f\u7684\u6642\u6a5f\u5728\u65bc\u4f60\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u4e0d\u61c9\u8a72\u5c07\u6e2c\u8a66\u7684\u8cc7\u6599\u7684\u8cc7\u8a0a\u6d29\u6f0f\u5230\u8a13\u7df4\u904e\u7a0b\u4e2d\u3002\u5b83\u6703\u9020\u6210\u6a21\u578b\u7d66\u51fa\u4e00\u500b\u975e\u5e38\u6a02\u89c0\u7684\u7d50\u679c\uff0c\u5373\u4f7f\u5728\u4ea4\u53c9\u9a57\u8b49\u4e2d\u4e5f\u662f\u5982\u6b64\uff0c\u4f46\u5728\u5c0d\u5be6\u969b\u65b0\u6578\u64da\u9032\u884c\u6e2c\u8a66\u6642\u8868\u73fe\u6703\u975e\u5e38\u5730\u7cdf\u7cd5\u3002 \u8cc7\u6599\u6d29\u6f0f\u6700\u5e38\u767c\u751f\u65bc\u8cc7\u6599\u524d\u8655\u7406\u7684\u968e\u6bb5\uff0c\u5c24\u5176\u662f\u7576\u4f60\u7684\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5c1a\u672a\u5207\u5272\u7684\u6642\u5019\u3002Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a\u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u6cd5\uff0c\u4f8b\u5982: \u7f3a\u5931\u503c\u88dc\u503c(imputers)\u3001\u6b63\u898f\u5316 (normalizers)\u3001\u6a19\u6e96\u5316(standardization)\u4ee5\u53ca\u5c0d\u6578(log) \u8f49\u63db...\u7b49\u3002\u9019\u4e9b\u8f49\u63db\u5668\u90fd\u6703\u4f9d\u8cf4\u65bc\u4f60\u8f38\u5165\u8cc7\u6599\u7684\u5206\u4f48\uff0c\u4e26\u4f9d\u7167\u6b64\u5206\u4f48\u505a\u76f8\u5c0d\u61c9\u7684\u64ec\u5408\u3002 \u8209\u4f8b\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a19\u6e96\u5316\u6642(StandardScaler)\u900f\u904e\u5f9e\u6bcf\u7b46\u8cc7\u6599\u4e2d\u6e1b\u53bb\u5e73\u5747\u503c\u4e26\u5c07\u5176\u9664\u4ee5\u6a19\u6e96\u504f\u5dee\u4f86\u7372\u5f97\u7e2e\u653e\u5f8c\u7684\u6578\u64da\u3002\u6211\u5011\u4f7f\u7528 fit() \u65b9\u6cd5\u5728\u6240\u6709\u8cc7\u6599\u96c6 X \u4e0a\u505a\u8f49\u63db\uff0c\u4e26\u4f7f\u5f97\u8f49\u63db\u5668\u5b78\u7fd2\u6bcf\u500b\u7279\u5fb5\u7684\u6574\u500b\u5206\u4f48\u7684\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee\u3002\u9019\u4e9b\u8cc7\u6599\u8f49\u63db\u5f8c\u5982\u679c\u518d\u5c07\u9019\u4e9b\u6578\u64da\u62c6\u5206\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5247\u8a13\u7df4\u96c6\u6703\u53d7\u5230\u6c61\u67d3\u3002\u56e0\u70ba StandardScaler \u5f9e\u5be6\u969b\u5206\u4f48\u4e2d\u6d29\u9732\u4e86\u6e2c\u8a66\u96c6\u91cd\u8981\u8a0a\u606f\uff0c\u4e00\u822c\u4f86\u8aaa\u6211\u5011\u4e0d\u80fd\u5c07\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u60c5\u6cc1\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u3002\u96d6\u7136\u6211\u5011\u5e0c\u671b\u8a13\u7df4\u96c6\u7684\u5206\u4f48\u8207\u5be6\u969b\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u56e0\u70ba\u4f7f\u5f97\u6a21\u578b\u8868\u73fe\u7d50\u679c\u7a69\u5b9a\u3002 \u96d6\u7136\u6211\u5011\u628a\u6e2c\u8a66\u96c6\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u4e26\u505a\u8f49\u63db\uff0c\u9019\u4e00\u6b65\u9a5f\u5c0d\u6211\u5011\u4f86\u8aaa\u53ef\u80fd\u6c92\u4ec0\u9ebc\u3002\u4f46\u662f\u5c0d\u65bc Sklearn \u5f37\u5927\u7684\u6f14\u7b97\u6cd5\uff0c\u53ef\u80fd\u6703\u900f\u904e\u9019\u500b\u907a\u6f0f\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u7684\u8a0a\u606f\u628a\u6a21\u578b\u64ec\u5408\u7684\u5f88\u597d\u3002\u5c46\u6642\u6a21\u578b\u8a13\u7df4\u5b8c\u6210\u5f8c\uff0c\u6e2c\u8a66\u96c6\u4e0d\u5920\u65b0\u7a4e\uff0c\u7121\u6cd5\u5728\u5be6\u969b\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002 \u6700\u7c21\u55ae\u7684\u89e3\u6c7a\u8fa6\u6cd5\uff0c\u5c31\u662f\u4e0d\u8981\u4f7f\u7528 fit() \u4e00\u6b21\u8f49\u63db\u6240\u6709\u7684\u8cc7\u6599\u3002\u5728\u505a\u4efb\u4f55\u8cc7\u6599\u8f49\u63db\u4e4b\u524d\u8981\u5148\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5df2\u7d93\u5b8c\u6574\u5730\u88ab\u5207\u958b\u3002\u5373\u4f7f\u5207\u958b\u5f8c\u4e5f\u4e0d\u8981\u518d\u62ff\u6e2c\u8a66\u96c6\u547c\u53eb fit() \u6216 fit_transform() \uff0c\u9019\u4e00\u6a23\u6703\u5c0e\u81f4\u76f8\u540c\u554f\u984c\u767c\u751f\u3002\u56e0\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5fc5\u9808\u9032\u884c\u76f8\u540c\u7684\u8f49\u63db\uff0c\u4f9d\u7167\u5b98\u65b9\u7684\u7bc4\u4f8b\u6211\u5011\u5fc5\u9808\u5148\u4f7f\u7528 fit_transform() \u5728\u8a13\u7df4\u96c6\u4e0a\u9032\u884c\u64ec\u5408\u8207\u8f49\u63db\u3002\u9019\u78ba\u4fdd\u4e86\u8f49\u63db\u5668\u50c5\u5f9e\u8a13\u7df4\u96c6\u5b78\u7fd2\uff0c\u5f9e\u4e2d\u627e\u51fa\u53c3\u6578\u4f8b\u5982\u5e73\u5747\u503c\u8207\u8b8a\u7570\u6578\u4e26\u540c\u6642\u5c0d\u5176\u9032\u884c\u8b8a\u63db\u3002\u63a5\u8457\u4f7f\u7528 transform() \u65b9\u6cd5\u5728\u6e2c\u8a66\u8cc7\u6599\u4e0a\u9032\u884c\u8f49\u63db\uff0c\u6839\u64da\u5f9e\u8a13\u7df4\u6578\u64da\u4e2d\u5b78\u5230\u7684\u8a0a\u606f\u9032\u884c\u8f49\u63db\u3002 from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler X , y = load_iris ( return_X_y = True ) X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y , random_state = 44 ) scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) X_test_scaled = scaler . transform ( X_test ) \u66f4\u5f37\u5927\u7684\u89e3\u6c7a\u65b9\u6848\u662f\u4f7f\u7528 Sklearn \u5167\u5efa\u7684 pipeline\uff0c\u5b83\u80fd\u5920\u4fdd\u8b77\u6a21\u578b\u514d\u65bc\u8cc7\u6599\u6d29\u6f0f\u7684\u554f\u984c\u3002\u6b64\u65b9\u6cd5\u80fd\u5920\u78ba\u4fdd\u8a13\u7df4\u8cc7\u6599\u50c5\u53c3\u8207\u8f49\u63db\u64ec\u5408\u8207\u6a21\u578b\u8a13\u7df4\uff0c\u800c\u6e2c\u8a66\u8cc7\u6599\u50c5\u7528\u65bc\u8a08\u7b97\u4e26\u9a57\u8b49\u6a21\u578b\u3002 6. \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5982\u679c\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599 R2 score \u5f97\u5230\u4e86 0.85 \u5c31\u4ee3\u8868\u5f88\u597d\u4e86\u55ce\uff1f\u4e0d\u76e1\u7136\uff01\u5118\u7ba1\u6709\u9ad8\u7684\u6e2c\u8a66\u5206\u6578\u901a\u5e38\u610f\u5473\u8457\u6a21\u578b\u8868\u73fe\u4f73\uff0c\u4f46\u5728\u89e3\u91cb\u6e2c\u8a66\u7d50\u679c\u6642\u4ecd\u6709\u4e00\u4e9b\u91cd\u8981\u7684\u6ce8\u610f\u4e8b\u9805\u3002\u9996\u5148\u6700\u91cd\u8981\u7684\uff0c\u7121\u8ad6\u5206\u6578\u503c\u5982\u4f55\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u4e00\u5b9a\u8981\u8207\u8a13\u7df4\u96c6\u76f8\u6bd4\u8f03\u624d\u80fd\u78ba\u4fdd\u6a21\u578b\u8a13\u7df4\u597d\u8207\u58de\u3002\u7576\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u96c6\u5206\u6578\u9ad8\u65bc\u6e2c\u8a66\u96c6\u7684\u5206\u6578\uff0c\u4e26\u4e14\u5169\u8005\u90fd\u8db3\u5920\u9ad8\u4ee5\u6eff\u8db3\u5c08\u6848\u7684\u76ee\u6a19\u671f\u671b\u6642\u9019\u4ee3\u8868\u4f60\u8a13\u7df4\u4e86\u4e00\u500b\u597d\u6a21\u578b\u3002\u7136\u800c\u9019\u4e26\u4e0d\u610f\u5473\u8457\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u7570\u8d8a\u5927\u8d8a\u597d\u3002\u8209\u500b\u4f8b\u5b50\uff0c\u82e5\u8a13\u7df4\u96c6\u7684 R2 score \u70ba 0.85 \u6e2c\u8a66\u96c6\u70ba 0.8 \u5373\u4ee3\u8868\u6a21\u578b\u65e2\u4e0d\u904e\u5ea6\u64ec\u5408(overfit)\u4e5f\u4e0d\u6b20\u64ec\u5408(underfit)\u3002\u4f46\u662f\u5982\u679c\u8a13\u7df4\u96c6 0.9 \u6e2c\u8a66\u96c6 0.7 \u7684\u6642\u5019\uff0c\u4f60\u7684\u6a21\u578b\u5c31\u662f\u904e\u64ec\u5408\u3002\u5176\u539f\u56e0\u662f\u8a72\u6a21\u578b\u6c92\u6709\u5728\u8a13\u7df4\u671f\u9593\u9032\u884c\u6cdb\u5316\uff0c\u800c\u662f\u8a18\u4f4f\u4e86\u4e00\u4e9b\u8a13\u7df4\u6578\u64da\uff0c\u5f9e\u800c\u5c0e\u81f4\u6e2c\u8a66\u5206\u6578\u4f4e\u5f97\u591a\u3002 \u5728\u5927\u591a\u6578\u4efb\u52d9\u4e2d\u4f60\u5c07\u6703\u770b\u5230\u8a31\u591a\u4eba\u4f7f\u7528 tree-based \u6a21\u578b\u6216\u662f\u6574\u9ad4\u5b78\u7fd2\u6a21\u578b (ensemble models)\u3002\u4f8b\u5982\u5728\u96a8\u6a5f\u68ee\u6797\u6f14\u7b97\u6cd5\u7576\u4e2d\u5982\u679c\u5b83\u5011\u7684\u6a39\u6df1\u5ea6\u592a\u6df1\uff0c\u5f80\u5f80\u6703\u7372\u5f97\u975e\u5e38\u9ad8\u7684\u8a13\u7df4\u5206\u6578\uff0c\u5f9e\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u53e6\u5916\u4e5f\u6709\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u6bd4\u8a13\u7df4\u96c6\u9ad8\u7684\u60c5\u6cc1\uff0c\u82e5\u767c\u751f\u6b64\u60c5\u6cc1\u6642\u901a\u5e38\u90fd\u6703\u611f\u89ba\u662f\u4e0d\u662f\u505a\u932f\u4e86\u4ec0\u9ebc\u3002\u9019\u7a2e\u60c5\u6cc1\u7684\u4e3b\u8981\u539f\u56e0\u662f\u8cc7\u6599\u6d29\u6f0f\uff0c\u4e5f\u5c31\u662f\u4e0a\u4e00\u7bc0\u6211\u5011\u8a0e\u8ad6\u7684\u60c5\u6cc1\u3002\u6216\u662f\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599\u7b46\u6578\u592a\u5c11\uff0c\u6c92\u8fa6\u6cd5\u8db3\u4ee5\u9a57\u8b49\u6a21\u578b\u597d\u58de\u3002 \u53e6\u5916\u6709\u6642\u5019\u6211\u5011\u4e5f\u6703\u5f97\u5230\u5728\u8a13\u7df4\u96c6\u6709\u5f88\u597d\u7684\u8868\u73fe\u4f46\u6e2c\u8a66\u96c6\u7121\u6575\u5dee\u7684\u60c5\u6cc1\u3002\u7576\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u5dee\u7570\u5f88\u5927\u6642\uff0c\u554f\u984c\u5f80\u5f80\u8207\u6e2c\u8a66\u96c6\u6709\u95dc\u800c\u4e0d\u662f\u904e\u5ea6\u64ec\u5408\u3002\u9019\u6642\u5019\u4f60\u53ef\u80fd\u8981\u6aa2\u67e5\u8cc7\u6599\u9810\u8655\u7406\u7684\u65b9\u5f0f\u662f\u5426\u4e00\u81f4 (\u50cf\u662f\u53d6 log \u6216 scale)\uff0c\u6216\u662f\u53ea\u662f\u5fd8\u8a18\u5c0d\u6e2c\u8a66\u96c6\u505a\u8f49\u63db\u8655\u7406\u3002 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\uff0c\u7e3d\u4e4b\u5728\u8a13\u7df4\u597d\u6a21\u578b\u6642\u8acb\u4ed4\u7d30\u6aa2\u67e5\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u8ddd\u3002\u4e26\u4e14\u53ef\u4ee5\u900f\u904e\u6b64\u8a55\u4f30\u65b9\u5f0f\u6aa2\u8996\u6a21\u578b\u662f\u5426\u904e\u64ec\u5408\uff0c\u540c\u6642\u4e5f\u80fd\u9032\u884c\u6a21\u578b\u689d\u53c3\u6216\u662f\u9078\u64c7\u6700\u4f73\u7684\u8cc7\u6599\u9810\u8655\u7406\u65b9\u5f0f\u3002\u4e26\u70ba\u6700\u7d42\u7684\u6a21\u578b\u505a\u6700\u4f73\u7684\u6e96\u5099\u3002 7. \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u6211\u60f3\u5927\u5bb6\u61c9\u8a72\u90fd\u719f\u7df4\u638c\u63e1\u4e86 overfitting \u9019\u500b\u8b70\u984c\u3002\u9019\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u4e00\u500b\u8feb\u5207\u554f\u984c\uff0c\u4e26\u5df2\u7d93\u8a2d\u8a08\u4e86\u7121\u6578\u500b\u65b9\u6cd5\u4f86\u89e3\u6c7a\u5b83\u3002\u6700\u57fa\u672c\u7684\u65b9\u6cd5\u662f\u5c07\u4e00\u90e8\u5206\u6578\u64da\u4f5c\u70ba\u6e2c\u8a66\u96c6\u4f86\u6a21\u64ec\u548c\u6e2c\u91cf\u6a21\u578b\u5728\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u7684\u6027\u80fd\u3002\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u8abf\u6574\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u76f4\u5230\u6a21\u578b\u5728\u8a72\u7279\u5b9a\u6e2c\u8a66\u96c6\u4e0a\u9054\u5230\u6700\u9ad8\u5206\u6578\uff0c\u9019\u53c8\u610f\u5473\u8457\u67d0\u7a2e\u542b\u7fa9\u7684\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u6703\u5c07\u5b8c\u6574\u6578\u64da\u7684\u53e6\u4e00\u90e8\u5206\u4f5c\u70ba \u9a57\u8b49\u96c6 \u518d\u6b21\u89e3\u6c7a\u9019\u500b\u554f\u984c\u3002\u6a21\u578b\u5c07\u5728\u8a13\u7df4\u6578\u64da\u4e0a\u9032\u884c\u8a13\u7df4\uff0c\u4e26\u5728\u9a57\u8b49\u96c6\u4e0a\u5fae\u8abf\u5176\u53c3\u6578\uff0c\u4e26\u5728\u6e2c\u8a66\u96c6\u4e0a\u9032\u884c\u6700\u7d42\u8a55\u4f30\u3002 \u4f46\u662f\u5c07\u6211\u5011\u5bf6\u8cb4\u7684\u6578\u64da\u5206\u6210\u4e09\u7d44\u610f\u5473\u8457\u6a21\u578b\u53ef\u4ee5\u5b78\u7fd2\u7684\u6578\u64da\u91cf\u66f4\u5c11\u3002\u6b64\u5916\u6a21\u578b\u7684\u6574\u9ad4\u9810\u6e2c\u6027\u80fd\u5c07\u53d6\u6c7a\u65bc\u90a3\u5c0d\u7279\u5b9a\u7684\u8a13\u7df4\u96c6\u548c\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u6642\u6700\u5e38\u4f7f\u7528 K-Fold cross-validation \u89e3\u6c7a\u4e0a\u8ff0\u554f\u984c\u3002\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6211\u7684\u524d\u5169\u5929\u6587\u7ae0 [Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4ee5\u53ca [Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u3002\u6839\u64da\u6211\u5011\u8a2d\u5b9a\u7684 K \u503c\uff0c\u53ef\u4ee5\u5b8c\u6574\u7684\u5c07\u6578\u64da\u88ab\u5206\u6210 K \u7d44 folds\uff0c\u5c0d\u65bc\u6bcf\u500b folds \u6bcf\u6b21\u6a21\u578b\u8a13\u7df4\u6703\u628a K-1 \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\uff0c\u800c\u5269\u4e0b\u7684\u88ab\u6b78\u985e\u70ba\u9a57\u8b49\u96c6\u3002\u7576\u6a21\u578b\u4ea4\u53c9\u9a57\u8b49\u7d50\u675f\u5f8c\uff0c\u8a13\u7df4\u96c6\u6240\u6709\u8cc7\u6599\u6703\u88ab\u5b8c\u6574\u7684\u8a13\u7df4\u3002 8. \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u5728\u9810\u8a2d\u7684\u60c5\u6cc1\u4e0b\u6240\u6709 Sklearn \u5206\u985e\u5668\u5728\u547c\u53eb score() \u51fd\u6578\u6642\u90fd\u4f7f\u7528\u6e96\u78ba\u5ea6\u4f5c\u70ba\u8a55\u5206\u65b9\u6cd5\u3002\u7531\u65bc\u6e96\u78ba\u7387\u7684\u8a08\u7b97\u65b9\u5f0f\u7c21\u55ae\u8207\u5bb9\u6613\u7406\u89e3\uff0c\u56e0\u6b64\u7d93\u5e38\u6703\u770b\u5230\u521d\u5b78\u8005\u5ee3\u6cdb\u4f7f\u7528\u5b83\u4f86\u5224\u65b7\u5176\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e0d\u5e78\u7684\u662f\u9019\u7a2e\u4e00\u822c\u6e96\u78ba\u7387\u7684\u8a55\u4f30\u65b9\u5f0f\u53ea\u5c0d\u985e\u5225\u5e73\u8861\u7684\u4e8c\u5143\u5206\u985e\u554f\u984c\u6709\u7528\u3002 \u7136\u800c\u5728\u5176\u4ed6\u7684\u72c0\u6cc1\u4e0b\u5b83\u662f\u4e00\u500b\u8aa4\u5c0e\u6027\u7684\u6307\u6a19\uff0c\u5373\u4f7f\u662f\u8868\u73fe\u6700\u5dee\u7684\u6a21\u578b\u4e5f\u53ef\u80fd\u80cc\u5f8c\u96b1\u85cf\u8457\u9ad8\u6e96\u78ba\u5ea6\u7684\u5206\u6578\u3002\u8209\u4f8b\u4f86\u8aaa\u6709\u500b\u5075\u6e2c\u5783\u573e\u90f5\u4ef6\u7684\u6a21\u578b\u5b83\u7684\u6e96\u78ba\u7387 90%\uff0c\u4f46\u662f\u5be6\u969b\u4e0a\u5b83\u6839\u672c\u7121\u6cd5\u5075\u6e2c\u5230\u5783\u573e\u90f5\u4ef6\u3002\u9019\u662f\u70ba\u4ec0\u9ebc\uff1f\u7531\u65bc\u5783\u573e\u90f5\u4ef6\u4e26\u4e0d\u5e38\u898b\uff0c\u5206\u985e\u5668\u53ef\u4ee5\u6aa2\u6e2c\u6240\u6709\u975e\u5783\u573e\u90f5\u4ef6\uff0c\u5373\u4f7f\u5206\u985e\u5668\u5b8c\u5168\u7121\u6cd5\u9054\u5230\u5176\u76ee\u7684\u9019\u4e5f\u53ef\u4ee5\u63d0\u9ad8\u5176\u6e96\u78ba\u6027\u3002\u56e0\u70ba\u9019\u500b\u5206\u985e\u5668\u50c5\u53ef\u4ee5\u5206\u985e\u9019\u4e9b\u6b63\u5e38\u90f5\u4ef6\uff0c\u7a00\u5c11\u7684\u5783\u573e\u90f5\u4ef6\u6839\u672c\u8b8a\u8a8d\u4e0d\u51fa\u4f86\u3002 \u5c0d\u65bc\u591a\u5143\u985e\u5206\u985e\u7684\u554f\u984c\u66f4\u662f\u61c9\u8a72\u6ce8\u610f\u4f60\u7684\u6a21\u578b\u8a55\u4f30\u6307\u6a19\u3002\u5982\u679c\u9054\u5230 80% \u7684\u6e96\u78ba\u7387\uff0c\u662f\u5426\u610f\u5473\u8457\u6a21\u578b\u5728\u9810\u6e2c\u985e\u52251\u3001\u985e\u52252\u3001\u985e\u52253\u751a\u81f3\u6240\u6709\u985e\u6642\u4e00\u6a23\u6e96\u78ba\u5462\uff1f\u4e00\u822c\u7684\u6e96\u78ba\u7387\u6c38\u9060\u7121\u6cd5\u56de\u7b54\u6b64\u985e\u554f\u984c\uff0c\u4f46\u5e78\u904b\u7684\u662f\u5176\u4ed6\u5206\u985e\u6307\u6a19\u63d0\u4f9b\u4e86\u66f4\u591a\u7684\u8a0a\u606f\u6307\u6a19\u3002\u5b83\u5c31\u662f \u6df7\u6dc6\u77e9\u9663 (confusion matrix)\u3002 from sklearn.metrics import confusion_matrix y_true = [ 2 , 0 , 2 , 2 , 0 , 1 ] y_pred = [ 0 , 0 , 2 , 2 , 0 , 2 ] confusion_matrix ( y_true , y_pred ) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) \u7d44\u6210\u6df7\u6dc6\u77e9\u9663\u7684\u56db\u500b\u5143\u7d20\u5206\u5225\u6709 TP\u3001TN\u3001FP\u3001FN\u3002\u57fa\u672c\u4e0a\u6df7\u6dc6\u77e9\u9663\u6703\u62ff\u9019\u56db\u500b\u6307\u6a19\u505a\u53c3\u8003\uff0c\u540c\u6642\u7b97\u51fa\u4f86\u7684\u5206\u6578\u4e5f\u66f4\u80fd\u53bb\u8a55\u4f30\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u7684\u7d50\u679c\u3002\u6b64\u5916\u6211\u5011\u53ef\u4ee5\u5229\u7528\u6df7\u6dc6\u77e9\u9663\u4f86\u8a08\u7b97 Precision\u3001Recall\u3001Accuracy \u7b49\u5206\u6578\u3002 TP(True Positive): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\u771f\u5be6\u7b54\u6848(Ground True)\u662f\u8c93\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93\uff0c\u5373\u70baTP TN(True Negative): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u8ca0\u6a23\u672c\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u6a19\u793a\u6210\u4e0d\u662f\u8c93\uff0c\u5373\u70baTN FP(False Positive): \u932f\u8aa4\u9810\u6e2c\u6210\u6b63\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u8ca0\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93 FN(False Negative): \u932f\u8aa4\u9810\u6e2c\u6210\u8ca0\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u4e0d\u662f\u8c93 9. \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u9810\u6e2c\u9023\u7e8c\u6027\u6578\u503c\u8f38\u51fa\u7684\u8ff4\u6b78\u6a21\u578b\u4e2d\uff0c\u5927\u5bb6\u5f80\u5f80\u6703\u76f4\u63a5\u547c\u53eb\u6a21\u578b\u63d0\u4f9b\u7684\u8a55\u4f30\u65b9\u6cd5\u76f4\u63a5\u8a08\u7b97 score \u3002\u7136\u800c\u9019\u500b\u5206\u6578\u5728\u8ff4\u6b78\u6a21\u578b\u4e2d\u662f\u8a08\u7b97 R2 \u5206\u6578\uff0c\u53c8\u7a31\u5224\u5b9a\u4fc2\u6578 (coefficient of determination)\u3002\u6240\u8b02\u7684\u5224\u5b9a\u4fc2\u6578\u662f\u8f38\u5165\u7279\u5fb5 (x) \u53bb\u89e3\u91cb\u8f38\u51fa (y) \u7684\u8b8a\u7570\u7a0b\u5ea6\u6709\u591a\u5c11\uff0c\u5176\u8a08\u7b97\u516c\u5f0f\u662f\uff1a\u8ff4\u6b78\u6a21\u578b\u7684\u8b8a\u7570\u91cf (SSR)/\u7e3d\u8b8a\u7570\u91cf (TSS) \u3002\u7528\u4ee5\u4e0b\u8b8a\u7570\u6578\u5206\u6790\u8868\uff08ANOVA table\uff09\u4f86\u8aaa TSS \u5c31\u662f\u8a08\u7b97\u7e3d\u8b8a\u7570\uff0c\u628a\u6bcf\u500b\u5be6\u969b\u7684 y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u800c SSR \u5c31\u662f\u628a\u6240\u6709\u7684\u6a21\u578b\u9810\u6e2c y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u5982\u679c R2 \u5206\u6578\u5f88\u9ad8\u8d8a\u63a5\u8fd1 1\uff0c\u8868\u793a\u6a21\u578b\u7684\u89e3\u91cb\u80fd\u529b\u5f88\u9ad8\u3002 \u5728\u5b78\u8853\u7814\u7a76\u4e0a\u6700\u76f4\u89ba\u7684\u89c0\u5ff5\u662f R2 \u5206\u6578\u6108\u63a5\u8fd1 1 \u8d8a\u597d\uff0c\u4e5f\u6709\u4e9b\u4eba\u900f\u904e\u4e00\u4e9b\u624b\u6bb5\u4f86\u88fd\u9020 R2 \u5206\u6578\u5f88\u9ad8\u7684\u5047\u8c61\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5176\u5be6\u53ea\u900f\u904e R2 \u500b\u8a55\u4f30\u6307\u6a19\u5c31\u4f86\u6c7a\u5b9a\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u662f\u4e0d\u592a\u597d\u7684\u7fd2\u6163\u3002\u66f4\u9032\u4e00\u6b65\u53ef\u4ee5\u4f7f\u7528 MSE\u3001MAE \u7b49\u6b98\u5dee\u7684\u8a55\u4f30\u503c\u6a19\u4f86\u770b\u6bcf\u7b46\u8cc7\u6599\u5be6\u969b\u503c\u8207\u9810\u6e2c\u503c\u7684\u8aa4\u5dee\u3002\u6216\u662f\u4f7f\u7528\u76f8\u5c0d\u8aa4\u5dee\u4f86\u89c0\u5bdf\u9810\u6e2c\u6a21\u578b\u7684\u53ef\u4fe1\u5ea6\u3002\u6b64\u5916\u7b46\u8005\u9084\u5efa\u8b70\u53ef\u4ee5\u8a66\u8457\u628a\u6bcf\u7b46\u8cc7\u6599\u7684\u771f\u5be6 y \u8207\u6a21\u578b\u9810\u6e2c\u7684 \u0177 \u7e6a\u88fd\u51fa\u4f86\uff0c\u82e5\u5448\u73fe\u4e00\u689d\u660e\u986f\u7684\u7531\u5de6\u4e0b\u5230\u53f3\u4e0a\u659c\u76f4\u7dda\uff0c\u5247\u8868\u793a\u6a21\u578b\u6240\u9810\u6e2c\u7684\u7d50\u679c\u8207\u771f\u5be6\u7b54\u6848\u5f88\u76f8\u8fd1\u3002 10. \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a \u8fd1\u5e7e\u5e74 AI \u7684\u767c\u5c55\u60f3\u5fc5\u5927\u5bb6\u6709\u76ee\u5171\u7779\uff0c\u5f9e\u5f71\u50cf\u8b58\u5225\u5230\u7269\u4ef6\u8fa8\u8b58\u7684\u6280\u8853\u6709\u8457\u91cd\u5927\u7684\u9032\u5c55\u3002\u6b64\u5916 2016 \u5e74 Google Deepmind \u5718\u968a\u7684 AlphaGo \u9996\u5ea6\u6253\u6557\u4eba\u985e\uff0c\u9019\u4e5f\u5728\u4eba\u6a5f\u5c0d\u5f08\u4e0a\u958b\u555f\u4e86\u4e00\u9805\u91cd\u8981\u7684\u91cc\u7a0b\u7891\u3002\u751a\u81f3\u5728\u81ea\u7136\u8a9e\u8a00\u65b9\u9762\uff0c\u6b78\u529f\u65bc\u65b0\u7684\u6a21\u578b\u67b6\u69cb\u8207\u786c\u9ad4\u8cc7\u6e90\u7684\u9032\u6b65\uff0c\u4f7f\u5f97\u81ea\u7136\u8a9e\u8a00\u6709\u91cd\u5927\u7684\u7a81\u7834\u3002\u770b\u5230\u9019\u9ebc\u591a AI \u7684\u7f8e\u597d\u8b93\u5927\u5bb6\u518d\u6b21\u5c0d\u6df1\u5ea6\u5b78\u7fd2\u9ede\u71c3\u5e0c\u671b\uff01\u53ea\u4e0d\u904e AI \u4e26\u975e\u842c\u80fd\uff0c\u5207\u8a18\uff01\u6240\u6709\u7684\u554f\u984c\u4e26\u4e0d\u662f\u5c07\u8cc7\u6599\u6536\u96c6\u597d\uff0c\u4e26\u5c07\u8cc7\u6599\u4e1f\u7d66\u96fb\u8166\u5b78\u7fd2\u5c31\u6703\u5f97\u5230\u4f60\u60f3\u8981\u7684\u7d50\u679c\u3002\u5927\u5bb6\u4e5f\u8a31\u6703\u9677\u5165\u300c\u70ba AI \u800c AI\u300d \u7684\u8ff7\u601d\uff0c\u5f88\u591a\u7684\u4efb\u52d9\u5176\u5be6\u900f\u904e\u5177\u6709\u898f\u5247\u7684\u5c08\u5bb6\u7cfb\u7d71\u6216\u662f\u50b3\u7d71\u6f14\u7b97\u6cd5\u5c31\u53ef\u4ee5\u9054\u5230\u5f88\u4e0d\u932f\u7684\u7d50\u679c\u3002\u518d\u8005\u6211\u5011\u90fd\u5c0d AI \u7684\u6280\u8853\u611f\u5230\u7279\u5225\u6b61\u559c\u8207\u671f\u5f85\uff0c\u4f46\u662f AI \u7684\u9ed1\u76d2\u5b50\u4eba\u985e\u5f80\u5f80\u4e0d\u77e5\u9053\u6a21\u578b\u4e0b\u4e00\u6b65\u6703\u7522\u751f\u4ec0\u9ebc\u4e0d\u53ef\u9810\u671f\u7684\u7d50\u679c\u3002\u5176\u5be6 AI \u6709\u5f88\u591a\u7684\u9650\u5236\u8207\u6311\u6230\uff0c\u9664\u4e86\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4ee5\u5916\uff0c\u6211\u5011\u66f4\u9700\u8981\u95dc\u6ce8\u7684\u662f\u6a21\u578b\u5728\u60f3\u4ec0\u9ebc\u3002\u53ef\u89e3\u91cb\u4eba\u5de5\u667a\u6167\u5fc5\u7136\u662f\u6211\u5011\u8981\u63a2\u8a0e\u7684\u4e00\u6bb5\u8ab2\u984c\u3002AI \u8207\u6a5f\u5668\u4eba\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u8981\u53d6\u4ee3\u4eba\u985e\uff0c\u6211\u8a8d\u70ba AI \u6bd4\u8f03\u9069\u5408\u626e\u6f14\u8f14\u52a9\u4eba\u985e\u7684\u91cd\u8981\u89d2\u8272\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#day-27","text":"","title":"[Day 27] \u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#_1","text":"\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u7684\u5341\u500b\u932f\u8aa4","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#_2","text":"\u4eba\u5de5\u667a\u6167\u8fd1\u5e74\u4f86\u6210\u70ba\u4efb\u4f55\u7522\u696d\u71b1\u9580\u7684\u8a71\u984c\u4e4b\u4e00\uff0c\u5404\u516c\u53f8\u7a4d\u6975\u5730\u5c0e\u5165\u6a5f\u5668\u5b78\u7fd2\u6280\u8853\u5354\u52a9\u7522\u696d AI \u5316\u3002\u4f8b\u5982\uff1a\u667a\u6167\u91ab\u7642\u3001\u667a\u6167\u4ea4\u901a\u3001\u667a\u6167\u88fd\u9020......\u7b49\u3002\u6b63\u662f\u56e0\u70ba AI \u6280\u8853\u7684\u5275\u65b0\u8207\u666e\u53ca\uff0c\u8a13\u7df4\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u518d\u4e5f\u4e0d\u662f\u7406\u5de5\u80cc\u666f\u7684\u4eba\u624d\u80fd\u505a\u7684\u4e8b\u3002\u6b64\u5916\u96a8\u8457 Python \u958b\u767c\u793e\u7fa4\u8301\u58ef\uff0c\u8a31\u591a\u958b\u6e90\u7684 AI \u5957\u4ef6\u5982\u96e8\u5f8c\u6625\u7b4d\u822c\u7684\u51fa\u73fe\u5927\u5927\u964d\u4f4e\u4e86\u6a5f\u5668\u5b78\u7fd2\u5efa\u6a21\u7684\u9580\u6abb\u3002\u5728\u4eca\u5929\u7684\u5167\u5bb9\u4e2d\u6211\u60f3\u85c9\u7531\u9435\u4eba\u8cfd\u4f86\u8ddf\u5927\u5bb6\u5206\u4eab\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b\uff0c\u4e26\u4e14\u5f9e\u8cc7\u6599\u9762\u8207\u6a21\u578b\u9762\u7684\u89d2\u5ea6\u4f86\u63a2\u8a0e\u6a5f\u5668\u5b78\u7fd2\u61c9\u8a72\u6ce8\u610f\u7684\u5e7e\u4ef6\u4e8b\u3002\u5c24\u5176\u662f\u5728\u521d\u5b78\u968e\u6bb5\uff0c\u56e0\u7f3a\u4e4f\u7d93\u9a57\u5f80\u5f80\u6703\u72af\u4e00\u4e9b\u7121\u53ef\u907f\u514d\u7684\u932f\u8aa4\u3002\u6240\u4ee5\u9019\u7bc7\u6587\u7ae0\u5c07\u9ede\u51fa\u5341\u500b\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5e38\u72af\u7684\u96b1\u5f62\u932f\u8aa4\u3002 \u8cc7\u6599\u9762 \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576 \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4 \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163 \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f \u6a21\u578b\u9762 \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19 \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a","title":"\u524d\u8a00"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#1","text":"\u6a5f\u5668\u5b78\u7fd2\u9996\u8981\u7684\u6b65\u9a5f\u662f\u5b9a\u7fa9\u554f\u984c\uff0c\u7576\u78ba\u5b9a\u76ee\u6a19\u8207\u65b9\u5411\u5f8c\u5373\u53ef\u958b\u59cb\u641c\u96c6\u8cc7\u6599\u3002\u76f8\u4fe1\u5927\u5bb6\u90fd\u77e5\u9053\u73fe\u5be6\u751f\u6d3b\u4e2d\u7684\u8cc7\u6599\u5f97\u4f86\u4e0d\u6613\uff0c\u5373\u4f7f\u5f9e\u8cc7\u6599\u5eab\u53d6\u5f97\u4e86\u9019\u4e9b\u8cc7\u6599\u5f8c\u6211\u5011\u9084\u9700\u8981\u82b1\u5927\u91cf\u7684\u6642\u9593\u9032\u884c\u8cc7\u6599\u6e05\u6d17\u3002\u6240\u8b02\u7684\u8cc7\u6599\u6e05\u6d17\u662f\u8cc7\u6599\u5eab\u7576\u4e2d\u53ef\u80fd\u6703\u6709\u7f3a\u5931\u503c\uff0c\u4f8b\u5982\uff1aNA\u3001Inf\u3001NaN\u3001NULL\u3002 NA\uff1a\u8868\u793a\u7f3a\u5931\u503c\uff0c\u662f Not Available \u7684\u7e2e\u5beb\u3002 Inf\uff1a\u8868\u793a\u7121\u7aae\u5927\uff0c\u662f Infinite \u7684\u7e2e\u5beb\u3002 NaN\uff1a\u8868\u793a\u975e\u6578\u503c\uff0c\u662f Not a Number \u7684\u7e2e\u5beb\u3002 NULL\uff1a\u8868\u793a\u7a7a\u503c\uff0c\u5373\u6c92\u6709\u5167\u5bb9\u3002 \u7576\u8cc7\u6599\u90fd\u5b8c\u6210\u4e86\u524d\u8655\u7406\u5f8c\uff0c\u5373\u53ef\u958b\u59cb\u5efa\u7acb\u6a21\u578b\u8207\u8a55\u4f30\u6a21\u578b\u3002\u4f46\u662f\u7576\u8a13\u7df4\u51fa\u4f86\u7684\u6a21\u578b\u8868\u73fe\u4e0d\u597d\u6709\u5f88\u591a\u7684\u56e0\u7d20\u3002\u5927\u5bb6\u6700\u5e38\u505a\u7684\u662f\u66ff\u63db\u6a21\u578b\u6f14\u7b97\u6cd5\uff0c\u6216\u662f\u5617\u8a66\u4e0d\u540c\u7684\u6a21\u578b\u8d85\u53c3\u6578\u53d6\u5f97\u4e00\u500b\u6700\u4f73\u7684\u7d50\u679c\u3002\u4f46\u662f\u5728\u9032\u884c\u9019\u4e9b\u505a\u4e4b\u524d\uff0c\u5efa\u8b70\u5927\u5bb6\u5148\u628a\u95dc\u6ce8\u7684\u9ede\u56de\u5230\u8cc7\u6599\u8655\u7406\u9762\u3002\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u5176\u4e2d\u4e00\u500b\u56e0\u7d20\u662f\u8cc7\u6599\u7684\u6a19\u7c64\u6536\u96c6\u4e0d\u7576\u3002Landing.ai \u57f7\u884c\u9577\u5433\u6069\u9054\u4e5f\u66fe\u7d93\u8aaa\u904e\u7576\u4e00\u500b\u5c0f\u8cc7\u6599\u96c6\u5b58\u5728\u8457\u932f\u8aa4\u6a19\u7c64\u6642\uff0c\u6a21\u578b\u5f88\u96e3\u7d66\u51fa\u4e00\u500b\u6b63\u78ba\u7684\u8f38\u51fa\u3002\u56e0\u70ba\u8cc7\u6599\u9593\u593e\u5e36\u4e86\u96dc\u8a0a\u5f80\u5f80\u6703\u4f7f\u7684\u6a21\u578b\u5b58\u5728\u8457\u4e00\u4e9b\u504f\u5dee\uff0c\u5c0e\u81f4\u8a13\u7df4\u7d50\u679c\u4e0d\u7a69\u5b9a\u3002\u56e0\u6b64\u7b46\u8005\u5efa\u8b70\u6a21\u578b\u8a13\u7df4\u4e0d\u597d\u7684\u6642\u5019\uff0c\u53ef\u4ee5\u56de\u982d\u89c0\u5bdf\u8cc7\u6599\u662f\u5426\u5b58\u5728\u4e00\u4e9b\u932f\u8aa4\u3002\u800c\u4e0d\u662f\u4e00\u6627\u7684\u8abf\u6574\u6a21\u578b\u6f14\u7b97\u6cd5\u8207\u8d85\u53c3\u6578\u3002","title":"1. \u8cc7\u6599\u6536\u96c6\u8207\u8655\u7406\u4e0d\u7576"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#2","text":"\u5728\u5206\u985e\u7684\u8cc7\u6599\u4e2d\uff0c\u521d\u5b78\u8005\u5e38\u898b\u7684\u932f\u8aa4\u662f\u5fd8\u8a18\u4f7f\u7528\u5206\u5c64\u62bd\u6a23 (stratify) \u4f86\u5c0d\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u9032\u884c\u5207\u5272\u3002\u7576\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u76e1\u53ef\u80fd\u8207\u8a13\u7df4\u76f8\u540c\u60c5\u6cc1\u4e0b\uff0c\u6a21\u578b\u624d\u66f4\u6709\u53ef\u80fd\u5f97\u5230\u66f4\u6e96\u78ba\u7684\u9810\u6e2c\u3002\u7136\u800c\u5728\u5206\u985e\u7684\u554f\u984c\u4e2d\uff0c\u6211\u5011\u5fc5\u9808\u66f4\u95dc\u6ce8\u6bcf\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002\u4ee5\u4e0b\u8209\u500b\u4f8b\u5b50\uff1a\u5047\u8a2d\u6211\u5011\u6709\u4e09\u500b\u6a19\u7c64\u7684\u985e\u5225\uff0c\u800c\u9019\u4e09\u500b\u985e\u5225\u7684\u5206\u4f48\u6bd4\u4f8b\u5206\u5225\u70ba 4:3:3\u3002\u540c\u7406\u6211\u5011\u5728\u9032\u884c\u8cc7\u6599\u5207\u5272\u7684\u6642\u5019\u5fc5\u9808\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u9700\u8981\u6709\u76f8\u540c\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u3002 \u5927\u5bb6\u61c9\u8a72\u90fd\u4f7f\u7528\u904e Sklearn \u7684 train_test_split \u9032\u884c\u8cc7\u6599\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d Sklearn \u63d0\u4f9b\u4e86\u4e00\u500b stratify \u53c3\u6578\u9054\u5230\u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\u7684\u76ee\u7684\u3002\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\uff0c\u4e00\u4e9b\u5206\u985e\u554f\u984c\u53ef\u80fd\u6703\u5728\u76ee\u6a19\u985e\u7684\u5206\u4f48\u4e2d\u8868\u73fe\u51fa\u5f88\u5927\u7684\u4e0d\u5e73\u8861\uff1a\u4f8b\u5982\uff0c\u8ca0\u6a23\u672c\u8207\u6b63\u6a23\u672c\u6bd4\u4f8b\u61f8\u6b8a(\u4fe1\u7528\u5361\u76dc\u5237\u9810\u6e2c\u3001\u96e2\u8077\u54e1\u5de5\u9810\u6e2c)\u3002\u4ee5\u4e0b\u7528\u7d05\u9152\u5206\u985e\u9810\u6e2c\u4f86\u9032\u884c\u793a\u7bc4\uff0c\u9996\u5148\u6211\u5011\u4e0d\u4f7f\u7528 stratify \u96a8\u6a5f\u5207\u5272\u8cc7\u6599\u4e26\u67e5\u770b\u8cc7\u6599\u5207\u5272\u524d\u5f8c\u7684\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b\u3002 import pandas as pd from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split X , y = load_wine ( return_X_y = True ) # \u67e5\u770b\u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y ) . value_counts ( normalize = True ) # \u5168\u90e8\u8cc7\u6599\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398876 0 0.331461 2 0.269663 dtype: float64 # \u5be6\u9a57\u4e00: \u4e0d\u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.390977 0 0.330827 2 0.278195 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.511111 0 0.266667 2 0.222222 dtype: float64 \u5f9e\u4e0a\u9762\u5207\u51fa\u4f86\u7684\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u53ef\u4ee5\u767c\u73fe\u4e09\u500b\u985e\u5225\u7684\u8cc7\u6599\u5206\u4f48\u6bd4\u4f8b\u90fd\u4e0d\u540c\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u4f7f\u7528 stratify \u53c3\u6578\u518d\u5207\u5272\u4e00\u6b21\u3002 # \u5be6\u9a57\u4e8c: \u4f7f\u7528 stratify \u9032\u884c\u5207\u5272\u8cc7\u6599 X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y ) # \u67e5\u770b\u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_train ) . value_counts ( normalize = True ) # \u67e5\u770b\u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b pd . Series ( y_test ) . value_counts ( normalize = True ) # \u8a13\u7df4\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.400000 0 0.333333 2 0.266667 dtype: float64 # \u6e2c\u8a66\u96c6\u4e09\u7a2e\u985e\u5225\u6bd4\u4f8b 1 0.398496 0 0.330827 2 0.270677 dtype: float64 \u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5c07 stratify \u8a2d\u7f6e\u70ba\u76ee\u6a19 (y) \u5728\u8a13\u7df4\u548c\u6e2c\u8a66\u96c6\u4e2d\u7522\u751f\u76f8\u540c\u7684\u5206\u4f48\u3002\u56e0\u70ba\u6539\u8b8a\u7684\u985e\u5225\u7684\u6bd4\u4f8b\u662f\u4e00\u500b\u56b4\u91cd\u7684\u554f\u984c\uff0c\u53ef\u80fd\u6703\u4f7f\u6a21\u578b\u66f4\u504f\u5411\u65bc\u7279\u5b9a\u7684\u985e\u5225\u3002\u56e0\u6b64\u8a13\u7df4\u8cc7\u6599\u7684\u5206\u4f48\u5fc5\u9808\u8981\u8207\u5be6\u969b\u60c5\u6cc1\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002","title":"2. \u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u985e\u5225\u5206\u4f48\u4e0d\u4e00\u81f4"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#3","text":"\u8cc7\u6599\u8996\u89ba\u5316\u7684\u597d\u8655\u591a\u591a\uff0c\u5728\u672c\u7cfb\u5217\u6587\u7ae0 [Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\uff1f\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427\uff01 \u8207 [Day 22] Python \u8996\u89ba\u5316\u89e3\u91cb\u6578\u64da - Plotly Express \u8b1b\u89e3\u4e86\u8a31\u591a Python \u8cc7\u6599\u8996\u89ba\u5316\u7684\u6280\u5de7\u3002\u8cc7\u6599\u8996\u89ba\u5316\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u5206\u6790\u8207\u7d71\u8a08\u8cc7\u6599\u7684\u578b\u614b\uff0c\u5f80\u5f80\u6709\u597d\u7684\u8cc7\u6599\u6e05\u6d17\u8207\u524d\u8655\u7406\u5c0d\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u6703\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u6709\u8208\u8da3\u7684\u8b80\u8005\u53ef\u4ee5\u53c3\u8003 \u5b89\u65af\u5eab\u59c6\u56db\u91cd (Anscombe\u2019s quartet) \u3002\u4ed6\u4e3b\u8981\u662f\u662f\u900f\u904e\u56db\u500b\u5c0f\u8cc7\u6599\u96c6\u4e26\u900f\u904e\u8996\u89ba\u5316\u8207\u7d71\u8a08\u4f86\u89c0\u5bdf\uff0c\u4e26\u8aaa\u660e\u5728\u5206\u6790\u6578\u64da\u524d\u5148\u7e6a\u88fd\u5716\u8868\u7684\u91cd\u8981\u6027\uff0c\u4ee5\u53ca\u96e2\u7fa4\u503c\u5c0d\u7d71\u8a08\u7684\u5f71\u97ff\u4e4b\u5927\u3002","title":"3. \u6c92\u6709\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7fd2\u6163"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#4-labelencoder","text":"\u901a\u5e38\u6211\u5011\u8981\u70ba\u985e\u5225\u7684\u7279\u5fb5\u9032\u884c\u7de8\u78bc\uff0c\u76f4\u89ba\u6703\u60f3\u5230 Sklearn \u7684 LabelEncoder \u3002\u4f46\u662f\u5982\u679c\u4e00\u500b\u8cc7\u6599\u96c6\u4e2d\u6709\u591a\u500b\u7279\u5fb5\u662f\u5c6c\u65bc\u985e\u5225\u578b\u7684\u8cc7\u6599\uff0c\u8c48\u4e0d\u662f\u5f88\u9ebb\u7169?\u5fc5\u9808\u8981\u4e00\u500b\u4e00\u500b\u547c\u53eb LabelEncoder \u5206\u5225\u70ba\u9019\u4e9b\u7279\u5fb5\u9032\u884c\u8f49\u63db\u3002\u5982\u679c\u4f60\u770b\u5230\u9019\u908a\u6709\u540c\u611f\u7684\uff0c\u5728\u9019\u88e1\u8981\u544a\u8a34\u4f60\u4e8b\u5be6\u4e26\u975e\u5982\u6b64\uff01\u6211\u5011\u770b\u770b \u5728\u5b98\u65b9\u6587\u4ef6\u4e0b LabelEncoder \u7684\u63cf\u8ff0\uff1a This transformer should be used to encode target values, i.e. y, and not the input X. \u7c21\u55ae\u4f86\u8aaa LabelEncoder \u53ea\u662f\u88ab\u7528\u4f86\u7de8\u78bc\u8f38\u51fa\u9805 y \u800c\u5df2\u7684\uff01\u4f60\u9084\u5728\u7528\u5b83\u4f86\u7de8\u78bc\u4f60\u7684\u6bcf\u500b x \u55ce\uff1f\uff08\u6688 \u90a3\u9ebc\u6211\u5011\u8a72\u7528\u4ec0\u9ebc\u65b9\u6cd5\u4f86\u7de8\u78bc\u6709\u9806\u5e8f\u7684\u985e\u5225\u7279\u5fb5\u5462\uff1f\u5982\u679c\u4f60\u4ed4\u7d30\u95b1\u8b80\u6709\u95dc\u7de8\u78bc\u5206\u985e\u7279\u5fb5\u7684 Sklearn \u7528\u6236\u6307\u5357\uff0c\u4f60\u6703\u770b\u5230\u5b83\u6e05\u695a\u5730\u8aaa\u660e\uff1a To convert categorical features to integer codes, we can use the OrdinalEncoder. This estimator transforms each categorical feature to one new feature of integers (0 to n_categories - 1) \u770b\u5230\u9019\u908a\u5927\u5bb6\u61c9\u8a72\u77e5\u9053\u95b1\u8b80\u5b98\u65b9\u6587\u4ef6\u7684\u91cd\u8981\u6027\u5427\uff01\u5b98\u65b9\u6587\u4ef6\u4e2d\u5efa\u8b70 x \u9805\u7684\u8f38\u5165\u7279\u5fb5\u53ef\u4ee5\u63a1\u7528 OrdinalEncoder \u4e00\u6b21\u70ba\u6240\u6709\u7279\u5fb5\u4f9d\u5e8f\u505a Label Encoding\u3002OrdinalEncoder \u7de8\u78bc\u5668\u7684\u4f7f\u7528\u65b9\u5f0f\u5982\u4e0b\uff1a from sklearn.preprocessing import OrdinalEncoder enc = OrdinalEncoder () X = [[ 'Male' , 1 ], [ 'Female' , 3 ], [ 'Female' , 2 ]] enc . fit ( X ) print ( enc . categories_ ) enc . transform ([[ 'Female' , 3 ], [ 'Male' , 1 ]]) [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)] array([[0., 2.], [1., 0.]]) \u4ee5\u4e0a\u7684\u7bc4\u4f8b\u662f X \u6709\u4e09\u7b46\u8cc7\u6599\uff0c\u6bcf\u7b46\u8cc7\u6599\u90fd\u6709\u5169\u500b\u7279\u5fb5\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7b2c\u4e00\u500b\u7279\u5fb5\u662f\u6027\u5225 Male \u8207 Female\uff0c\u56e0\u6b64 OrdinalEncoder \u6703\u4f9d\u9020\u5b57\u6bcd\u958b\u982d\u505a\u6392\u5e8f Female \u7de8\u78bc\u70ba 0 \u800c Male \u7de8\u78bc\u70ba 1\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u7279\u5fb5\u70ba\u6578\u5b57 1\u30012\u30013\uff0c\u540c\u7406\u4f9d\u5e8f\u70ba\u4ed6\u5011\u7de8\u78bc\u6210 0\u30011\u30012\u3002\u53ea\u9700\u95b1\u8b80\u5b98\u65b9\u6587\u6a94\u548c\u7528\u6236\u6307\u5357\uff0c\u4f60\u5c31\u53ef\u4ee5\u4e86\u89e3\u5f88\u591a\u95dc\u65bc Sklearn \u7684\u77e5\u8b58\uff01\u662f\u4e0d\u662f\u5f88\u68d2\uff5e","title":"4. \u4f7f\u7528 LabelEncoder \u70ba\u7279\u5fb5\u7de8\u78bc"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#5","text":"\u8cc7\u6599\u6d29\u6f0f (data leakage) \u662f\u500b\u96b1\u5f62\u6bba\u624b\uff0c\u5b83\u6703\u5728\u4e0d\u77e5\u4e0d\u89ba\u4e2d\u5f71\u97ff\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u3002\u5176\u767c\u751f\u7684\u6642\u6a5f\u5728\u65bc\u4f60\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u4e0d\u61c9\u8a72\u5c07\u6e2c\u8a66\u7684\u8cc7\u6599\u7684\u8cc7\u8a0a\u6d29\u6f0f\u5230\u8a13\u7df4\u904e\u7a0b\u4e2d\u3002\u5b83\u6703\u9020\u6210\u6a21\u578b\u7d66\u51fa\u4e00\u500b\u975e\u5e38\u6a02\u89c0\u7684\u7d50\u679c\uff0c\u5373\u4f7f\u5728\u4ea4\u53c9\u9a57\u8b49\u4e2d\u4e5f\u662f\u5982\u6b64\uff0c\u4f46\u5728\u5c0d\u5be6\u969b\u65b0\u6578\u64da\u9032\u884c\u6e2c\u8a66\u6642\u8868\u73fe\u6703\u975e\u5e38\u5730\u7cdf\u7cd5\u3002 \u8cc7\u6599\u6d29\u6f0f\u6700\u5e38\u767c\u751f\u65bc\u8cc7\u6599\u524d\u8655\u7406\u7684\u968e\u6bb5\uff0c\u5c24\u5176\u662f\u7576\u4f60\u7684\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5c1a\u672a\u5207\u5272\u7684\u6642\u5019\u3002Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a\u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u6cd5\uff0c\u4f8b\u5982: \u7f3a\u5931\u503c\u88dc\u503c(imputers)\u3001\u6b63\u898f\u5316 (normalizers)\u3001\u6a19\u6e96\u5316(standardization)\u4ee5\u53ca\u5c0d\u6578(log) \u8f49\u63db...\u7b49\u3002\u9019\u4e9b\u8f49\u63db\u5668\u90fd\u6703\u4f9d\u8cf4\u65bc\u4f60\u8f38\u5165\u8cc7\u6599\u7684\u5206\u4f48\uff0c\u4e26\u4f9d\u7167\u6b64\u5206\u4f48\u505a\u76f8\u5c0d\u61c9\u7684\u64ec\u5408\u3002 \u8209\u4f8b\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a19\u6e96\u5316\u6642(StandardScaler)\u900f\u904e\u5f9e\u6bcf\u7b46\u8cc7\u6599\u4e2d\u6e1b\u53bb\u5e73\u5747\u503c\u4e26\u5c07\u5176\u9664\u4ee5\u6a19\u6e96\u504f\u5dee\u4f86\u7372\u5f97\u7e2e\u653e\u5f8c\u7684\u6578\u64da\u3002\u6211\u5011\u4f7f\u7528 fit() \u65b9\u6cd5\u5728\u6240\u6709\u8cc7\u6599\u96c6 X \u4e0a\u505a\u8f49\u63db\uff0c\u4e26\u4f7f\u5f97\u8f49\u63db\u5668\u5b78\u7fd2\u6bcf\u500b\u7279\u5fb5\u7684\u6574\u500b\u5206\u4f48\u7684\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee\u3002\u9019\u4e9b\u8cc7\u6599\u8f49\u63db\u5f8c\u5982\u679c\u518d\u5c07\u9019\u4e9b\u6578\u64da\u62c6\u5206\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\uff0c\u5247\u8a13\u7df4\u96c6\u6703\u53d7\u5230\u6c61\u67d3\u3002\u56e0\u70ba StandardScaler \u5f9e\u5be6\u969b\u5206\u4f48\u4e2d\u6d29\u9732\u4e86\u6e2c\u8a66\u96c6\u91cd\u8981\u8a0a\u606f\uff0c\u4e00\u822c\u4f86\u8aaa\u6211\u5011\u4e0d\u80fd\u5c07\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u60c5\u6cc1\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u3002\u96d6\u7136\u6211\u5011\u5e0c\u671b\u8a13\u7df4\u96c6\u7684\u5206\u4f48\u8207\u5be6\u969b\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\uff0c\u56e0\u70ba\u4f7f\u5f97\u6a21\u578b\u8868\u73fe\u7d50\u679c\u7a69\u5b9a\u3002 \u96d6\u7136\u6211\u5011\u628a\u6e2c\u8a66\u96c6\u8207\u8a13\u7df4\u96c6\u6df7\u5728\u4e00\u8d77\u4e26\u505a\u8f49\u63db\uff0c\u9019\u4e00\u6b65\u9a5f\u5c0d\u6211\u5011\u4f86\u8aaa\u53ef\u80fd\u6c92\u4ec0\u9ebc\u3002\u4f46\u662f\u5c0d\u65bc Sklearn \u5f37\u5927\u7684\u6f14\u7b97\u6cd5\uff0c\u53ef\u80fd\u6703\u900f\u904e\u9019\u500b\u907a\u6f0f\u6e2c\u8a66\u96c6\u7684\u5206\u4f48\u7684\u8a0a\u606f\u628a\u6a21\u578b\u64ec\u5408\u7684\u5f88\u597d\u3002\u5c46\u6642\u6a21\u578b\u8a13\u7df4\u5b8c\u6210\u5f8c\uff0c\u6e2c\u8a66\u96c6\u4e0d\u5920\u65b0\u7a4e\uff0c\u7121\u6cd5\u5728\u5be6\u969b\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u6e2c\u8a66\u6a21\u578b\u7684\u6027\u80fd\u3002 \u6700\u7c21\u55ae\u7684\u89e3\u6c7a\u8fa6\u6cd5\uff0c\u5c31\u662f\u4e0d\u8981\u4f7f\u7528 fit() \u4e00\u6b21\u8f49\u63db\u6240\u6709\u7684\u8cc7\u6599\u3002\u5728\u505a\u4efb\u4f55\u8cc7\u6599\u8f49\u63db\u4e4b\u524d\u8981\u5148\u78ba\u4fdd\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u5df2\u7d93\u5b8c\u6574\u5730\u88ab\u5207\u958b\u3002\u5373\u4f7f\u5207\u958b\u5f8c\u4e5f\u4e0d\u8981\u518d\u62ff\u6e2c\u8a66\u96c6\u547c\u53eb fit() \u6216 fit_transform() \uff0c\u9019\u4e00\u6a23\u6703\u5c0e\u81f4\u76f8\u540c\u554f\u984c\u767c\u751f\u3002\u56e0\u70ba\u8a13\u7df4\u96c6\u548c\u6e2c\u8a66\u96c6\u5fc5\u9808\u9032\u884c\u76f8\u540c\u7684\u8f49\u63db\uff0c\u4f9d\u7167\u5b98\u65b9\u7684\u7bc4\u4f8b\u6211\u5011\u5fc5\u9808\u5148\u4f7f\u7528 fit_transform() \u5728\u8a13\u7df4\u96c6\u4e0a\u9032\u884c\u64ec\u5408\u8207\u8f49\u63db\u3002\u9019\u78ba\u4fdd\u4e86\u8f49\u63db\u5668\u50c5\u5f9e\u8a13\u7df4\u96c6\u5b78\u7fd2\uff0c\u5f9e\u4e2d\u627e\u51fa\u53c3\u6578\u4f8b\u5982\u5e73\u5747\u503c\u8207\u8b8a\u7570\u6578\u4e26\u540c\u6642\u5c0d\u5176\u9032\u884c\u8b8a\u63db\u3002\u63a5\u8457\u4f7f\u7528 transform() \u65b9\u6cd5\u5728\u6e2c\u8a66\u8cc7\u6599\u4e0a\u9032\u884c\u8f49\u63db\uff0c\u6839\u64da\u5f9e\u8a13\u7df4\u6578\u64da\u4e2d\u5b78\u5230\u7684\u8a0a\u606f\u9032\u884c\u8f49\u63db\u3002 from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler X , y = load_iris ( return_X_y = True ) X_train , X_test , y_train , y_test = train_test_split ( X , y , stratify = y , random_state = 44 ) scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) X_test_scaled = scaler . transform ( X_test ) \u66f4\u5f37\u5927\u7684\u89e3\u6c7a\u65b9\u6848\u662f\u4f7f\u7528 Sklearn \u5167\u5efa\u7684 pipeline\uff0c\u5b83\u80fd\u5920\u4fdd\u8b77\u6a21\u578b\u514d\u65bc\u8cc7\u6599\u6d29\u6f0f\u7684\u554f\u984c\u3002\u6b64\u65b9\u6cd5\u80fd\u5920\u78ba\u4fdd\u8a13\u7df4\u8cc7\u6599\u50c5\u53c3\u8207\u8f49\u63db\u64ec\u5408\u8207\u6a21\u578b\u8a13\u7df4\uff0c\u800c\u6e2c\u8a66\u8cc7\u6599\u50c5\u7528\u65bc\u8a08\u7b97\u4e26\u9a57\u8b49\u6a21\u578b\u3002","title":"5. \u8cc7\u6599\u8655\u7406\u4e0d\u7576\u5c0e\u81f4\u8cc7\u6599\u6d29\u6f0f"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#6","text":"\u5982\u679c\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599 R2 score \u5f97\u5230\u4e86 0.85 \u5c31\u4ee3\u8868\u5f88\u597d\u4e86\u55ce\uff1f\u4e0d\u76e1\u7136\uff01\u5118\u7ba1\u6709\u9ad8\u7684\u6e2c\u8a66\u5206\u6578\u901a\u5e38\u610f\u5473\u8457\u6a21\u578b\u8868\u73fe\u4f73\uff0c\u4f46\u5728\u89e3\u91cb\u6e2c\u8a66\u7d50\u679c\u6642\u4ecd\u6709\u4e00\u4e9b\u91cd\u8981\u7684\u6ce8\u610f\u4e8b\u9805\u3002\u9996\u5148\u6700\u91cd\u8981\u7684\uff0c\u7121\u8ad6\u5206\u6578\u503c\u5982\u4f55\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u4e00\u5b9a\u8981\u8207\u8a13\u7df4\u96c6\u76f8\u6bd4\u8f03\u624d\u80fd\u78ba\u4fdd\u6a21\u578b\u8a13\u7df4\u597d\u8207\u58de\u3002\u7576\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u96c6\u5206\u6578\u9ad8\u65bc\u6e2c\u8a66\u96c6\u7684\u5206\u6578\uff0c\u4e26\u4e14\u5169\u8005\u90fd\u8db3\u5920\u9ad8\u4ee5\u6eff\u8db3\u5c08\u6848\u7684\u76ee\u6a19\u671f\u671b\u6642\u9019\u4ee3\u8868\u4f60\u8a13\u7df4\u4e86\u4e00\u500b\u597d\u6a21\u578b\u3002\u7136\u800c\u9019\u4e26\u4e0d\u610f\u5473\u8457\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u7570\u8d8a\u5927\u8d8a\u597d\u3002\u8209\u500b\u4f8b\u5b50\uff0c\u82e5\u8a13\u7df4\u96c6\u7684 R2 score \u70ba 0.85 \u6e2c\u8a66\u96c6\u70ba 0.8 \u5373\u4ee3\u8868\u6a21\u578b\u65e2\u4e0d\u904e\u5ea6\u64ec\u5408(overfit)\u4e5f\u4e0d\u6b20\u64ec\u5408(underfit)\u3002\u4f46\u662f\u5982\u679c\u8a13\u7df4\u96c6 0.9 \u6e2c\u8a66\u96c6 0.7 \u7684\u6642\u5019\uff0c\u4f60\u7684\u6a21\u578b\u5c31\u662f\u904e\u64ec\u5408\u3002\u5176\u539f\u56e0\u662f\u8a72\u6a21\u578b\u6c92\u6709\u5728\u8a13\u7df4\u671f\u9593\u9032\u884c\u6cdb\u5316\uff0c\u800c\u662f\u8a18\u4f4f\u4e86\u4e00\u4e9b\u8a13\u7df4\u6578\u64da\uff0c\u5f9e\u800c\u5c0e\u81f4\u6e2c\u8a66\u5206\u6578\u4f4e\u5f97\u591a\u3002 \u5728\u5927\u591a\u6578\u4efb\u52d9\u4e2d\u4f60\u5c07\u6703\u770b\u5230\u8a31\u591a\u4eba\u4f7f\u7528 tree-based \u6a21\u578b\u6216\u662f\u6574\u9ad4\u5b78\u7fd2\u6a21\u578b (ensemble models)\u3002\u4f8b\u5982\u5728\u96a8\u6a5f\u68ee\u6797\u6f14\u7b97\u6cd5\u7576\u4e2d\u5982\u679c\u5b83\u5011\u7684\u6a39\u6df1\u5ea6\u592a\u6df1\uff0c\u5f80\u5f80\u6703\u7372\u5f97\u975e\u5e38\u9ad8\u7684\u8a13\u7df4\u5206\u6578\uff0c\u5f9e\u800c\u5c0e\u81f4\u904e\u5ea6\u64ec\u5408\u3002\u53e6\u5916\u4e5f\u6709\u6e2c\u8a66\u96c6\u7684\u5206\u6578\u6bd4\u8a13\u7df4\u96c6\u9ad8\u7684\u60c5\u6cc1\uff0c\u82e5\u767c\u751f\u6b64\u60c5\u6cc1\u6642\u901a\u5e38\u90fd\u6703\u611f\u89ba\u662f\u4e0d\u662f\u505a\u932f\u4e86\u4ec0\u9ebc\u3002\u9019\u7a2e\u60c5\u6cc1\u7684\u4e3b\u8981\u539f\u56e0\u662f\u8cc7\u6599\u6d29\u6f0f\uff0c\u4e5f\u5c31\u662f\u4e0a\u4e00\u7bc0\u6211\u5011\u8a0e\u8ad6\u7684\u60c5\u6cc1\u3002\u6216\u662f\u4f60\u7684\u6e2c\u8a66\u8cc7\u6599\u7b46\u6578\u592a\u5c11\uff0c\u6c92\u8fa6\u6cd5\u8db3\u4ee5\u9a57\u8b49\u6a21\u578b\u597d\u58de\u3002 \u53e6\u5916\u6709\u6642\u5019\u6211\u5011\u4e5f\u6703\u5f97\u5230\u5728\u8a13\u7df4\u96c6\u6709\u5f88\u597d\u7684\u8868\u73fe\u4f46\u6e2c\u8a66\u96c6\u7121\u6575\u5dee\u7684\u60c5\u6cc1\u3002\u7576\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u5dee\u7570\u5f88\u5927\u6642\uff0c\u554f\u984c\u5f80\u5f80\u8207\u6e2c\u8a66\u96c6\u6709\u95dc\u800c\u4e0d\u662f\u904e\u5ea6\u64ec\u5408\u3002\u9019\u6642\u5019\u4f60\u53ef\u80fd\u8981\u6aa2\u67e5\u8cc7\u6599\u9810\u8655\u7406\u7684\u65b9\u5f0f\u662f\u5426\u4e00\u81f4 (\u50cf\u662f\u53d6 log \u6216 scale)\uff0c\u6216\u662f\u53ea\u662f\u5fd8\u8a18\u5c0d\u6e2c\u8a66\u96c6\u505a\u8f49\u63db\u8655\u7406\u3002 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\uff0c\u7e3d\u4e4b\u5728\u8a13\u7df4\u597d\u6a21\u578b\u6642\u8acb\u4ed4\u7d30\u6aa2\u67e5\u8a13\u7df4\u548c\u6e2c\u8a66\u5206\u6578\u4e4b\u9593\u7684\u5dee\u8ddd\u3002\u4e26\u4e14\u53ef\u4ee5\u900f\u904e\u6b64\u8a55\u4f30\u65b9\u5f0f\u6aa2\u8996\u6a21\u578b\u662f\u5426\u904e\u64ec\u5408\uff0c\u540c\u6642\u4e5f\u80fd\u9032\u884c\u6a21\u578b\u689d\u53c3\u6216\u662f\u9078\u64c7\u6700\u4f73\u7684\u8cc7\u6599\u9810\u8655\u7406\u65b9\u5f0f\u3002\u4e26\u70ba\u6700\u7d42\u7684\u6a21\u578b\u505a\u6700\u4f73\u7684\u6e96\u5099\u3002","title":"6. \u50c5\u4f7f\u7528\u6e2c\u8a66\u96c6\u8a55\u4f30\u6a21\u578b\u597d\u58de"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#7","text":"\u6211\u60f3\u5927\u5bb6\u61c9\u8a72\u90fd\u719f\u7df4\u638c\u63e1\u4e86 overfitting \u9019\u500b\u8b70\u984c\u3002\u9019\u662f\u6a5f\u5668\u5b78\u7fd2\u4e2d\u4e00\u500b\u8feb\u5207\u554f\u984c\uff0c\u4e26\u5df2\u7d93\u8a2d\u8a08\u4e86\u7121\u6578\u500b\u65b9\u6cd5\u4f86\u89e3\u6c7a\u5b83\u3002\u6700\u57fa\u672c\u7684\u65b9\u6cd5\u662f\u5c07\u4e00\u90e8\u5206\u6578\u64da\u4f5c\u70ba\u6e2c\u8a66\u96c6\u4f86\u6a21\u64ec\u548c\u6e2c\u91cf\u6a21\u578b\u5728\u770b\u4e0d\u898b\u7684\u6578\u64da\u4e0a\u7684\u6027\u80fd\u3002\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u8abf\u6574\u6a21\u578b\u7684\u8d85\u53c3\u6578\uff0c\u76f4\u5230\u6a21\u578b\u5728\u8a72\u7279\u5b9a\u6e2c\u8a66\u96c6\u4e0a\u9054\u5230\u6700\u9ad8\u5206\u6578\uff0c\u9019\u53c8\u610f\u5473\u8457\u67d0\u7a2e\u542b\u7fa9\u7684\u904e\u5ea6\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u6703\u5c07\u5b8c\u6574\u6578\u64da\u7684\u53e6\u4e00\u90e8\u5206\u4f5c\u70ba \u9a57\u8b49\u96c6 \u518d\u6b21\u89e3\u6c7a\u9019\u500b\u554f\u984c\u3002\u6a21\u578b\u5c07\u5728\u8a13\u7df4\u6578\u64da\u4e0a\u9032\u884c\u8a13\u7df4\uff0c\u4e26\u5728\u9a57\u8b49\u96c6\u4e0a\u5fae\u8abf\u5176\u53c3\u6578\uff0c\u4e26\u5728\u6e2c\u8a66\u96c6\u4e0a\u9032\u884c\u6700\u7d42\u8a55\u4f30\u3002 \u4f46\u662f\u5c07\u6211\u5011\u5bf6\u8cb4\u7684\u6578\u64da\u5206\u6210\u4e09\u7d44\u610f\u5473\u8457\u6a21\u578b\u53ef\u4ee5\u5b78\u7fd2\u7684\u6578\u64da\u91cf\u66f4\u5c11\u3002\u6b64\u5916\u6a21\u578b\u7684\u6574\u9ad4\u9810\u6e2c\u6027\u80fd\u5c07\u53d6\u6c7a\u65bc\u90a3\u5c0d\u7279\u5b9a\u7684\u8a13\u7df4\u96c6\u548c\u9a57\u8b49\u96c6\u3002\u56e0\u6b64\u5728\u9032\u884c\u6a5f\u5668\u5b78\u7fd2\u6642\u6700\u5e38\u4f7f\u7528 K-Fold cross-validation \u89e3\u6c7a\u4e0a\u8ff0\u554f\u984c\u3002\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u6211\u7684\u524d\u5169\u5929\u6587\u7ae0 [Day 25] \u4ea4\u53c9\u9a57\u8b49 Cross-Validation \u7c21\u4ecb \u4ee5\u53ca [Day 26] \u4ea4\u53c9\u9a57\u8b49 K-Fold Cross-Validation \u3002\u6839\u64da\u6211\u5011\u8a2d\u5b9a\u7684 K \u503c\uff0c\u53ef\u4ee5\u5b8c\u6574\u7684\u5c07\u6578\u64da\u88ab\u5206\u6210 K \u7d44 folds\uff0c\u5c0d\u65bc\u6bcf\u500b folds \u6bcf\u6b21\u6a21\u578b\u8a13\u7df4\u6703\u628a K-1 \u7d44\u4f5c\u70ba\u8a13\u7df4\u96c6\uff0c\u800c\u5269\u4e0b\u7684\u88ab\u6b78\u985e\u70ba\u9a57\u8b49\u96c6\u3002\u7576\u6a21\u578b\u4ea4\u53c9\u9a57\u8b49\u7d50\u675f\u5f8c\uff0c\u8a13\u7df4\u96c6\u6240\u6709\u8cc7\u6599\u6703\u88ab\u5b8c\u6574\u7684\u8a13\u7df4\u3002","title":"7. \u5728\u6c92\u6709\u4ea4\u53c9\u9a57\u8b49\u7684\u60c5\u6cc1\u4e0b\u5224\u65b7\u6a21\u578b\u6027\u80fd"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#8","text":"\u5728\u9810\u8a2d\u7684\u60c5\u6cc1\u4e0b\u6240\u6709 Sklearn \u5206\u985e\u5668\u5728\u547c\u53eb score() \u51fd\u6578\u6642\u90fd\u4f7f\u7528\u6e96\u78ba\u5ea6\u4f5c\u70ba\u8a55\u5206\u65b9\u6cd5\u3002\u7531\u65bc\u6e96\u78ba\u7387\u7684\u8a08\u7b97\u65b9\u5f0f\u7c21\u55ae\u8207\u5bb9\u6613\u7406\u89e3\uff0c\u56e0\u6b64\u7d93\u5e38\u6703\u770b\u5230\u521d\u5b78\u8005\u5ee3\u6cdb\u4f7f\u7528\u5b83\u4f86\u5224\u65b7\u5176\u6a21\u578b\u7684\u6027\u80fd\u3002\u4e0d\u5e78\u7684\u662f\u9019\u7a2e\u4e00\u822c\u6e96\u78ba\u7387\u7684\u8a55\u4f30\u65b9\u5f0f\u53ea\u5c0d\u985e\u5225\u5e73\u8861\u7684\u4e8c\u5143\u5206\u985e\u554f\u984c\u6709\u7528\u3002 \u7136\u800c\u5728\u5176\u4ed6\u7684\u72c0\u6cc1\u4e0b\u5b83\u662f\u4e00\u500b\u8aa4\u5c0e\u6027\u7684\u6307\u6a19\uff0c\u5373\u4f7f\u662f\u8868\u73fe\u6700\u5dee\u7684\u6a21\u578b\u4e5f\u53ef\u80fd\u80cc\u5f8c\u96b1\u85cf\u8457\u9ad8\u6e96\u78ba\u5ea6\u7684\u5206\u6578\u3002\u8209\u4f8b\u4f86\u8aaa\u6709\u500b\u5075\u6e2c\u5783\u573e\u90f5\u4ef6\u7684\u6a21\u578b\u5b83\u7684\u6e96\u78ba\u7387 90%\uff0c\u4f46\u662f\u5be6\u969b\u4e0a\u5b83\u6839\u672c\u7121\u6cd5\u5075\u6e2c\u5230\u5783\u573e\u90f5\u4ef6\u3002\u9019\u662f\u70ba\u4ec0\u9ebc\uff1f\u7531\u65bc\u5783\u573e\u90f5\u4ef6\u4e26\u4e0d\u5e38\u898b\uff0c\u5206\u985e\u5668\u53ef\u4ee5\u6aa2\u6e2c\u6240\u6709\u975e\u5783\u573e\u90f5\u4ef6\uff0c\u5373\u4f7f\u5206\u985e\u5668\u5b8c\u5168\u7121\u6cd5\u9054\u5230\u5176\u76ee\u7684\u9019\u4e5f\u53ef\u4ee5\u63d0\u9ad8\u5176\u6e96\u78ba\u6027\u3002\u56e0\u70ba\u9019\u500b\u5206\u985e\u5668\u50c5\u53ef\u4ee5\u5206\u985e\u9019\u4e9b\u6b63\u5e38\u90f5\u4ef6\uff0c\u7a00\u5c11\u7684\u5783\u573e\u90f5\u4ef6\u6839\u672c\u8b8a\u8a8d\u4e0d\u51fa\u4f86\u3002 \u5c0d\u65bc\u591a\u5143\u985e\u5206\u985e\u7684\u554f\u984c\u66f4\u662f\u61c9\u8a72\u6ce8\u610f\u4f60\u7684\u6a21\u578b\u8a55\u4f30\u6307\u6a19\u3002\u5982\u679c\u9054\u5230 80% \u7684\u6e96\u78ba\u7387\uff0c\u662f\u5426\u610f\u5473\u8457\u6a21\u578b\u5728\u9810\u6e2c\u985e\u52251\u3001\u985e\u52252\u3001\u985e\u52253\u751a\u81f3\u6240\u6709\u985e\u6642\u4e00\u6a23\u6e96\u78ba\u5462\uff1f\u4e00\u822c\u7684\u6e96\u78ba\u7387\u6c38\u9060\u7121\u6cd5\u56de\u7b54\u6b64\u985e\u554f\u984c\uff0c\u4f46\u5e78\u904b\u7684\u662f\u5176\u4ed6\u5206\u985e\u6307\u6a19\u63d0\u4f9b\u4e86\u66f4\u591a\u7684\u8a0a\u606f\u6307\u6a19\u3002\u5b83\u5c31\u662f \u6df7\u6dc6\u77e9\u9663 (confusion matrix)\u3002 from sklearn.metrics import confusion_matrix y_true = [ 2 , 0 , 2 , 2 , 0 , 1 ] y_pred = [ 0 , 0 , 2 , 2 , 0 , 2 ] confusion_matrix ( y_true , y_pred ) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) \u7d44\u6210\u6df7\u6dc6\u77e9\u9663\u7684\u56db\u500b\u5143\u7d20\u5206\u5225\u6709 TP\u3001TN\u3001FP\u3001FN\u3002\u57fa\u672c\u4e0a\u6df7\u6dc6\u77e9\u9663\u6703\u62ff\u9019\u56db\u500b\u6307\u6a19\u505a\u53c3\u8003\uff0c\u540c\u6642\u7b97\u51fa\u4f86\u7684\u5206\u6578\u4e5f\u66f4\u80fd\u53bb\u8a55\u4f30\u4f60\u7684\u6a21\u578b\u8a13\u7df4\u7684\u7d50\u679c\u3002\u6b64\u5916\u6211\u5011\u53ef\u4ee5\u5229\u7528\u6df7\u6dc6\u77e9\u9663\u4f86\u8a08\u7b97 Precision\u3001Recall\u3001Accuracy \u7b49\u5206\u6578\u3002 TP(True Positive): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\u771f\u5be6\u7b54\u6848(Ground True)\u662f\u8c93\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93\uff0c\u5373\u70baTP TN(True Negative): \u6b63\u78ba\u9810\u6e2c\u6210\u529f\u7684\u8ca0\u6a23\u672c\uff0c\u6210\u529f\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u6a19\u793a\u6210\u4e0d\u662f\u8c93\uff0c\u5373\u70baTN FP(False Positive): \u932f\u8aa4\u9810\u6e2c\u6210\u6b63\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u8ca0\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u72d7\u7684\u7167\u7247\u9810\u6e2c\u6210\u8c93 FN(False Negative): \u932f\u8aa4\u9810\u6e2c\u6210\u8ca0\u6a23\u672c\uff0c\u5be6\u969b\u4e0a\u70ba\u6b63\u6a23\u672c\uff0c\u4f8b\u5982\uff1a\u932f\u8aa4\u7684\u628a\u4e00\u5f35\u8c93\u7684\u7167\u7247\u9810\u6e2c\u6210\u4e0d\u662f\u8c93","title":"8. \u5206\u985e\u554f\u984c\u50c5\u4f7f\u7528\u6e96\u78ba\u7387\u4f5c\u70ba\u8861\u91cf\u6a21\u578b\u7684\u6307\u6a19"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#9-r2","text":"\u5728\u9810\u6e2c\u9023\u7e8c\u6027\u6578\u503c\u8f38\u51fa\u7684\u8ff4\u6b78\u6a21\u578b\u4e2d\uff0c\u5927\u5bb6\u5f80\u5f80\u6703\u76f4\u63a5\u547c\u53eb\u6a21\u578b\u63d0\u4f9b\u7684\u8a55\u4f30\u65b9\u6cd5\u76f4\u63a5\u8a08\u7b97 score \u3002\u7136\u800c\u9019\u500b\u5206\u6578\u5728\u8ff4\u6b78\u6a21\u578b\u4e2d\u662f\u8a08\u7b97 R2 \u5206\u6578\uff0c\u53c8\u7a31\u5224\u5b9a\u4fc2\u6578 (coefficient of determination)\u3002\u6240\u8b02\u7684\u5224\u5b9a\u4fc2\u6578\u662f\u8f38\u5165\u7279\u5fb5 (x) \u53bb\u89e3\u91cb\u8f38\u51fa (y) \u7684\u8b8a\u7570\u7a0b\u5ea6\u6709\u591a\u5c11\uff0c\u5176\u8a08\u7b97\u516c\u5f0f\u662f\uff1a\u8ff4\u6b78\u6a21\u578b\u7684\u8b8a\u7570\u91cf (SSR)/\u7e3d\u8b8a\u7570\u91cf (TSS) \u3002\u7528\u4ee5\u4e0b\u8b8a\u7570\u6578\u5206\u6790\u8868\uff08ANOVA table\uff09\u4f86\u8aaa TSS \u5c31\u662f\u8a08\u7b97\u7e3d\u8b8a\u7570\uff0c\u628a\u6bcf\u500b\u5be6\u969b\u7684 y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u800c SSR \u5c31\u662f\u628a\u6240\u6709\u7684\u6a21\u578b\u9810\u6e2c y \u6e1b\u53bb\u5e73\u5747\u6578\u7684\u5e73\u65b9\u52a0\u7e3d\u8d77\u4f86\u3002\u5982\u679c R2 \u5206\u6578\u5f88\u9ad8\u8d8a\u63a5\u8fd1 1\uff0c\u8868\u793a\u6a21\u578b\u7684\u89e3\u91cb\u80fd\u529b\u5f88\u9ad8\u3002 \u5728\u5b78\u8853\u7814\u7a76\u4e0a\u6700\u76f4\u89ba\u7684\u89c0\u5ff5\u662f R2 \u5206\u6578\u6108\u63a5\u8fd1 1 \u8d8a\u597d\uff0c\u4e5f\u6709\u4e9b\u4eba\u900f\u904e\u4e00\u4e9b\u624b\u6bb5\u4f86\u88fd\u9020 R2 \u5206\u6578\u5f88\u9ad8\u7684\u5047\u8c61\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5176\u5be6\u53ea\u900f\u904e R2 \u500b\u8a55\u4f30\u6307\u6a19\u5c31\u4f86\u6c7a\u5b9a\u4e00\u500b\u6a21\u578b\u7684\u597d\u58de\u662f\u4e0d\u592a\u597d\u7684\u7fd2\u6163\u3002\u66f4\u9032\u4e00\u6b65\u53ef\u4ee5\u4f7f\u7528 MSE\u3001MAE \u7b49\u6b98\u5dee\u7684\u8a55\u4f30\u503c\u6a19\u4f86\u770b\u6bcf\u7b46\u8cc7\u6599\u5be6\u969b\u503c\u8207\u9810\u6e2c\u503c\u7684\u8aa4\u5dee\u3002\u6216\u662f\u4f7f\u7528\u76f8\u5c0d\u8aa4\u5dee\u4f86\u89c0\u5bdf\u9810\u6e2c\u6a21\u578b\u7684\u53ef\u4fe1\u5ea6\u3002\u6b64\u5916\u7b46\u8005\u9084\u5efa\u8b70\u53ef\u4ee5\u8a66\u8457\u628a\u6bcf\u7b46\u8cc7\u6599\u7684\u771f\u5be6 y \u8207\u6a21\u578b\u9810\u6e2c\u7684 \u0177 \u7e6a\u88fd\u51fa\u4f86\uff0c\u82e5\u5448\u73fe\u4e00\u689d\u660e\u986f\u7684\u7531\u5de6\u4e0b\u5230\u53f3\u4e0a\u659c\u76f4\u7dda\uff0c\u5247\u8868\u793a\u6a21\u578b\u6240\u9810\u6e2c\u7684\u7d50\u679c\u8207\u771f\u5be6\u7b54\u6848\u5f88\u76f8\u8fd1\u3002","title":"9. \u8ff4\u6b78\u554f\u984c\u50c5\u4f7f\u7528 R2 \u5206\u6578\u8a55\u4f30\u6a21\u578b\u597d\u58de"},{"location":"27.\u6a5f\u5668\u5b78\u7fd2\u5e38\u72af\u932f\u7684\u5341\u4ef6\u4e8b/#10-ai","text":"\u8fd1\u5e7e\u5e74 AI \u7684\u767c\u5c55\u60f3\u5fc5\u5927\u5bb6\u6709\u76ee\u5171\u7779\uff0c\u5f9e\u5f71\u50cf\u8b58\u5225\u5230\u7269\u4ef6\u8fa8\u8b58\u7684\u6280\u8853\u6709\u8457\u91cd\u5927\u7684\u9032\u5c55\u3002\u6b64\u5916 2016 \u5e74 Google Deepmind \u5718\u968a\u7684 AlphaGo \u9996\u5ea6\u6253\u6557\u4eba\u985e\uff0c\u9019\u4e5f\u5728\u4eba\u6a5f\u5c0d\u5f08\u4e0a\u958b\u555f\u4e86\u4e00\u9805\u91cd\u8981\u7684\u91cc\u7a0b\u7891\u3002\u751a\u81f3\u5728\u81ea\u7136\u8a9e\u8a00\u65b9\u9762\uff0c\u6b78\u529f\u65bc\u65b0\u7684\u6a21\u578b\u67b6\u69cb\u8207\u786c\u9ad4\u8cc7\u6e90\u7684\u9032\u6b65\uff0c\u4f7f\u5f97\u81ea\u7136\u8a9e\u8a00\u6709\u91cd\u5927\u7684\u7a81\u7834\u3002\u770b\u5230\u9019\u9ebc\u591a AI \u7684\u7f8e\u597d\u8b93\u5927\u5bb6\u518d\u6b21\u5c0d\u6df1\u5ea6\u5b78\u7fd2\u9ede\u71c3\u5e0c\u671b\uff01\u53ea\u4e0d\u904e AI \u4e26\u975e\u842c\u80fd\uff0c\u5207\u8a18\uff01\u6240\u6709\u7684\u554f\u984c\u4e26\u4e0d\u662f\u5c07\u8cc7\u6599\u6536\u96c6\u597d\uff0c\u4e26\u5c07\u8cc7\u6599\u4e1f\u7d66\u96fb\u8166\u5b78\u7fd2\u5c31\u6703\u5f97\u5230\u4f60\u60f3\u8981\u7684\u7d50\u679c\u3002\u5927\u5bb6\u4e5f\u8a31\u6703\u9677\u5165\u300c\u70ba AI \u800c AI\u300d \u7684\u8ff7\u601d\uff0c\u5f88\u591a\u7684\u4efb\u52d9\u5176\u5be6\u900f\u904e\u5177\u6709\u898f\u5247\u7684\u5c08\u5bb6\u7cfb\u7d71\u6216\u662f\u50b3\u7d71\u6f14\u7b97\u6cd5\u5c31\u53ef\u4ee5\u9054\u5230\u5f88\u4e0d\u932f\u7684\u7d50\u679c\u3002\u518d\u8005\u6211\u5011\u90fd\u5c0d AI \u7684\u6280\u8853\u611f\u5230\u7279\u5225\u6b61\u559c\u8207\u671f\u5f85\uff0c\u4f46\u662f AI \u7684\u9ed1\u76d2\u5b50\u4eba\u985e\u5f80\u5f80\u4e0d\u77e5\u9053\u6a21\u578b\u4e0b\u4e00\u6b65\u6703\u7522\u751f\u4ec0\u9ebc\u4e0d\u53ef\u9810\u671f\u7684\u7d50\u679c\u3002\u5176\u5be6 AI \u6709\u5f88\u591a\u7684\u9650\u5236\u8207\u6311\u6230\uff0c\u9664\u4e86\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u4ee5\u5916\uff0c\u6211\u5011\u66f4\u9700\u8981\u95dc\u6ce8\u7684\u662f\u6a21\u578b\u5728\u60f3\u4ec0\u9ebc\u3002\u53ef\u89e3\u91cb\u4eba\u5de5\u667a\u6167\u5fc5\u7136\u662f\u6211\u5011\u8981\u63a2\u8a0e\u7684\u4e00\u6bb5\u8ab2\u984c\u3002AI \u8207\u6a5f\u5668\u4eba\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u8981\u53d6\u4ee3\u4eba\u985e\uff0c\u6211\u8a8d\u70ba AI \u6bd4\u8f03\u9069\u5408\u626e\u6f14\u8f14\u52a9\u4eba\u985e\u7684\u91cd\u8981\u89d2\u8272\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"10. \u4efb\u4f55\u4e8b\u60c5\u5225\u6025\u8457\u60f3\u7528 AI \u89e3\u6c7a"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/","text":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4f7f\u7528 pickle + gzip \u5132\u5b58\u6a21\u578b \u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6253\u5305\u4e26\u5132\u5b58 \u8f09\u5165\u5132\u5b58\u7684\u6a21\u578b \u8b80\u53d6\u6253\u5305\u597d\u7684\u6a21\u578b\u4e26\u9810\u6e2c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u4eca\u5929\u7684\u6559\u5b78\u5167\u5bb9\u8981\u6559\u5404\u4f4d\u5982\u4f55\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\uff0c\u4e26\u63d0\u4f9b\u4e0b\u4e00\u6b21\u8f09\u5165\u6a21\u578b\u548c\u9810\u6e2c\u3002\u5728\u672c\u7cfb\u5217\u7684\u6559\u5b78\u4e2d\u4ecb\u7d39\u4e86\u8a31\u591a Sklearn \u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u3002\u7576\u6a21\u578b\u8a13\u7df4\u597d\u4e86\uff0c\u53ef\u4ee5\u5c07\u8a13\u7df4\u7d50\u679c\u5132\u5b58\u8d77\u4f86\uff0c\u4e26\u5efa\u7acb\u4e00\u500b API \u63a5\u53e3\u63d0\u4f9b\u6a21\u578b\u9810\u6e2c\u3002 \u6a21\u578b\u5132\u5b58\u65b9\u6cd5 \u5e38\u898b\u7684\u5132\u5b58\u6a21\u578b\u7684\u5957\u4ef6\u6709 pickle \u8207 joblib \u3002\u5176\u4e2d\u5728 [Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u6700\u5f8c\u6709\u4f7f\u7528 joblib \u4f86\u5132\u5b58\u6a21\u578b\uff0c\u64cd\u4f5c\u65b9\u6cd5\u4e5f\u975e\u5e38\u7c21\u55ae\u3002\u7136\u800c\u5728\u4eca\u5929\u7684\u6559\u5b78\u4e2d\u5247\u4f7f\u7528\u53e6\u4e00\u7a2e\u65b9\u6cd5 pickle \u4f86\u5132\u5b58\u6a21\u578b\u3002\u7531\u65bc pickle \u5132\u5b58\u6a21\u578b\u5f8c\u5bb9\u91cf\u53ef\u80fd\u6703\u6709\u597d\u5e7e\u767e MB \u56e0\u6b64\u5efa\u8b70\u53ef\u4ee5\u900f\u904e gzip \u4f86\u58d3\u7e2e\u6a21\u578b\u4e26\u5132\u5b58\u3002\u53e6\u5916\u5728 Python \u5b98\u65b9\u6587\u4ef6\u4e2d\u6709\u8b66\u544a\u7d55\u5c0d\u4e0d\u8981\u5229\u7528 pickle \u4f86 unpickle \u4f86\u8def\u4e0d\u660e\u7684\u6a94\u6848\u3002\u56e0\u70ba\u900f\u904e pickle \u6253\u5305\u6a21\u578b\u6703\u6709\u5b89\u5168\u6027\u7591\u616e\uff0c\u5305\u62ec arbitrary code execution \u7684\u554f\u984c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5982\u679c\u8981\u8ffd\u6c42\u57f7\u884c\u901f\u5ea6\u8207\u5b89\u5168\u6027\uff0c\u5efa\u8b70\u53ef\u4ee5\u63a1\u7528 JSON \u683c\u5f0f\u4f86\u5b58\u53d6\u6a21\u578b\u7684\u53c3\u6578\u8207\u8a2d\u5b9a\u3002 \u5f8c\u8a18\uff1a\u9019\u5e7e\u5e74ONNX\u6a21\u578b\u901a\u7528\u683c\u5f0f\u4e5f\u975e\u5e38\u6d41\u884c\uff0c\u9664\u4e86\u795e\u7d93\u7db2\u8def\u4e4b\u5916\u4e5f\u652f\u63f4sklearn\u7684\u6a21\u578b\u5132\u5b58\u3002\u5927\u5bb6\u4e0d\u59a8\u4e5f\u53ef\u4ee5\u8a66\u8a66\u770b\uff01 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u4eca\u65e5\u7684\u7bc4\u4f8b\u9084\u662f\u62ff\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u5148\u8f09\u5165\u8cc7\u6599\u96c6\u4e26\u9032\u884c\u8cc7\u6599\u7684\u5207\u5272\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data 2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8a13\u7df4\u6a21\u578b - XGBoost XGBoost \u6a21\u578b\u662f\u76ee\u524d\u6700\u71b1\u9580\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u4e4b\u4e00\uff0c\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003 [Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u3002\u88e1\u9762\u6703\u6709\u4ecb\u7d39\u8a73\u7d30\u7684\u6a21\u578b\u8aaa\u660e\u8207\u624b\u628a\u624b\u5be6\u4f5c\u3002\u7576\u7136\u5927\u5bb6\u4e5f\u53ef\u4ee5\u8a66\u8457\u7528\u5176\u4ed6 Sklearn \u7684\u6a21\u578b\u8a13\u7df4\u770b\u770b\uff0c\u4e00\u6a23\u53ef\u4ee5\u900f\u904e pickle \u4f86\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train ) \u5132\u5b58 XGboost \u6a21\u578b \u5927\u5bb6\u53ef\u4ee5\u89c0\u5bdf .pickle \u8207 .gzip \u5169\u7a2e\u4e0d\u540c\u526f\u6a94\u540d\u5132\u5b58\u7d50\u679c\u6a94\u6848\u5927\u5c0f\u6709\u4f55\u5dee\u5225? 1. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b import pickle with open ( './model/xgboost-iris.pickle' , 'wb' ) as f : pickle . dump ( xgboostModel , f ) 2. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b\u4e26\u5229\u7528 gzip \u58d3\u7e2e import pickle import gzip with gzip . GzipFile ( './model/xgboost-iris.pgz' , 'w' ) as f : pickle . dump ( xgboostModel , f ) \u8f09\u5165 XGboost \u6a21\u578b \u8a66\u8457\u8f09\u5165\u5169\u7a2e\u4e0d\u540c\u683c\u5f0f\u7684\u6a21\u578b\uff0c\u4e26\u9810\u6e2c\u4e00\u7b46\u8cc7\u6599\u3002\u6ce8\u610f\u6a21\u578b\u9810\u6e2c\u8f38\u5165\u5fc5\u9808\u70ba numpy \u578b\u614b\uff0c\u4e14\u9808\u70ba\u4e8c\u7dad\u9663\u5217\u683c\u5f0f\u3002 1. \u8f09\u5165 gzip \u683c\u5f0f\u6a21\u578b import pickle import gzip #\u8b80\u53d6Model with gzip . open ( './model/xgboost-iris.pgz' , 'r' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred ) 2. \u8f09\u5165 pickle \u683c\u5f0f\u6a21\u578b #\u8b80\u53d6Model with open ( './model/xgboost-iris.pickle' , 'rb' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred ) Reference How to save and load your Scikit-learn models in a minute Don't Pickle Your Data \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#day-28","text":"","title":"[Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_1","text":"\u4f7f\u7528 pickle + gzip \u5132\u5b58\u6a21\u578b \u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u6253\u5305\u4e26\u5132\u5b58 \u8f09\u5165\u5132\u5b58\u7684\u6a21\u578b \u8b80\u53d6\u6253\u5305\u597d\u7684\u6a21\u578b\u4e26\u9810\u6e2c \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_2","text":"\u4eca\u5929\u7684\u6559\u5b78\u5167\u5bb9\u8981\u6559\u5404\u4f4d\u5982\u4f55\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\uff0c\u4e26\u63d0\u4f9b\u4e0b\u4e00\u6b21\u8f09\u5165\u6a21\u578b\u548c\u9810\u6e2c\u3002\u5728\u672c\u7cfb\u5217\u7684\u6559\u5b78\u4e2d\u4ecb\u7d39\u4e86\u8a31\u591a Sklearn \u7684\u6a21\u578b\u6f14\u7b97\u6cd5\u3002\u7576\u6a21\u578b\u8a13\u7df4\u597d\u4e86\uff0c\u53ef\u4ee5\u5c07\u8a13\u7df4\u7d50\u679c\u5132\u5b58\u8d77\u4f86\uff0c\u4e26\u5efa\u7acb\u4e00\u500b API \u63a5\u53e3\u63d0\u4f9b\u6a21\u578b\u9810\u6e2c\u3002","title":"\u524d\u8a00"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#_3","text":"\u5e38\u898b\u7684\u5132\u5b58\u6a21\u578b\u7684\u5957\u4ef6\u6709 pickle \u8207 joblib \u3002\u5176\u4e2d\u5728 [Day 20] \u6a5f\u5668\u5b78\u7fd2\u91d1\u624b\u6307 - Auto-sklearn \u6700\u5f8c\u6709\u4f7f\u7528 joblib \u4f86\u5132\u5b58\u6a21\u578b\uff0c\u64cd\u4f5c\u65b9\u6cd5\u4e5f\u975e\u5e38\u7c21\u55ae\u3002\u7136\u800c\u5728\u4eca\u5929\u7684\u6559\u5b78\u4e2d\u5247\u4f7f\u7528\u53e6\u4e00\u7a2e\u65b9\u6cd5 pickle \u4f86\u5132\u5b58\u6a21\u578b\u3002\u7531\u65bc pickle \u5132\u5b58\u6a21\u578b\u5f8c\u5bb9\u91cf\u53ef\u80fd\u6703\u6709\u597d\u5e7e\u767e MB \u56e0\u6b64\u5efa\u8b70\u53ef\u4ee5\u900f\u904e gzip \u4f86\u58d3\u7e2e\u6a21\u578b\u4e26\u5132\u5b58\u3002\u53e6\u5916\u5728 Python \u5b98\u65b9\u6587\u4ef6\u4e2d\u6709\u8b66\u544a\u7d55\u5c0d\u4e0d\u8981\u5229\u7528 pickle \u4f86 unpickle \u4f86\u8def\u4e0d\u660e\u7684\u6a94\u6848\u3002\u56e0\u70ba\u900f\u904e pickle \u6253\u5305\u6a21\u578b\u6703\u6709\u5b89\u5168\u6027\u7591\u616e\uff0c\u5305\u62ec arbitrary code execution \u7684\u554f\u984c\uff0c\u8a73\u7d30\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003\u9019\u7bc7 \u6587\u7ae0 \u3002\u5982\u679c\u8981\u8ffd\u6c42\u57f7\u884c\u901f\u5ea6\u8207\u5b89\u5168\u6027\uff0c\u5efa\u8b70\u53ef\u4ee5\u63a1\u7528 JSON \u683c\u5f0f\u4f86\u5b58\u53d6\u6a21\u578b\u7684\u53c3\u6578\u8207\u8a2d\u5b9a\u3002 \u5f8c\u8a18\uff1a\u9019\u5e7e\u5e74ONNX\u6a21\u578b\u901a\u7528\u683c\u5f0f\u4e5f\u975e\u5e38\u6d41\u884c\uff0c\u9664\u4e86\u795e\u7d93\u7db2\u8def\u4e4b\u5916\u4e5f\u652f\u63f4sklearn\u7684\u6a21\u578b\u5132\u5b58\u3002\u5927\u5bb6\u4e0d\u59a8\u4e5f\u53ef\u4ee5\u8a66\u8a66\u770b\uff01","title":"\u6a21\u578b\u5132\u5b58\u65b9\u6cd5"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1","text":"\u4eca\u65e5\u7684\u7bc4\u4f8b\u9084\u662f\u62ff\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u793a\u7bc4\u3002\u9996\u5148\u6211\u5011\u5148\u8f09\u5165\u8cc7\u6599\u96c6\u4e26\u9032\u884c\u8cc7\u6599\u7684\u5207\u5272\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2","text":"from sklearn.model_selection import train_test_split X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] . values X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape )","title":"2) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#-xgboost","text":"XGBoost \u6a21\u578b\u662f\u76ee\u524d\u6700\u71b1\u9580\u7684\u6f14\u7b97\u6cd5\u6a21\u578b\u4e4b\u4e00\uff0c\u8a73\u7d30\u7684\u5167\u5bb9\u53ef\u4ee5\u53c3\u8003 [Day 15] \u6a5f\u5668\u5b78\u7fd2\u5e38\u52dd\u8ecd - XGBoost \u3002\u88e1\u9762\u6703\u6709\u4ecb\u7d39\u8a73\u7d30\u7684\u6a21\u578b\u8aaa\u660e\u8207\u624b\u628a\u624b\u5be6\u4f5c\u3002\u7576\u7136\u5927\u5bb6\u4e5f\u53ef\u4ee5\u8a66\u8457\u7528\u5176\u4ed6 Sklearn \u7684\u6a21\u578b\u8a13\u7df4\u770b\u770b\uff0c\u4e00\u6a23\u53ef\u4ee5\u900f\u904e pickle \u4f86\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u3002 from xgboost import XGBClassifier # \u5efa\u7acb XGBClassifier \u6a21\u578b xgboostModel = XGBClassifier ( n_estimators = 100 , learning_rate = 0.3 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b xgboostModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = xgboostModel . predict ( X_train )","title":"\u8a13\u7df4\u6a21\u578b - XGBoost"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#xgboost","text":"\u5927\u5bb6\u53ef\u4ee5\u89c0\u5bdf .pickle \u8207 .gzip \u5169\u7a2e\u4e0d\u540c\u526f\u6a94\u540d\u5132\u5b58\u7d50\u679c\u6a94\u6848\u5927\u5c0f\u6709\u4f55\u5dee\u5225?","title":"\u5132\u5b58 XGboost \u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1-pickle","text":"import pickle with open ( './model/xgboost-iris.pickle' , 'wb' ) as f : pickle . dump ( xgboostModel , f )","title":"1. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2-pickle-gzip","text":"import pickle import gzip with gzip . GzipFile ( './model/xgboost-iris.pgz' , 'w' ) as f : pickle . dump ( xgboostModel , f )","title":"2. \u4f7f\u7528 pickle \u5132\u5b58\u6a21\u578b\u4e26\u5229\u7528 gzip \u58d3\u7e2e"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#xgboost_1","text":"\u8a66\u8457\u8f09\u5165\u5169\u7a2e\u4e0d\u540c\u683c\u5f0f\u7684\u6a21\u578b\uff0c\u4e26\u9810\u6e2c\u4e00\u7b46\u8cc7\u6599\u3002\u6ce8\u610f\u6a21\u578b\u9810\u6e2c\u8f38\u5165\u5fc5\u9808\u70ba numpy \u578b\u614b\uff0c\u4e14\u9808\u70ba\u4e8c\u7dad\u9663\u5217\u683c\u5f0f\u3002","title":"\u8f09\u5165 XGboost \u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#1-gzip","text":"import pickle import gzip #\u8b80\u53d6Model with gzip . open ( './model/xgboost-iris.pgz' , 'r' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred )","title":"1. \u8f09\u5165 gzip \u683c\u5f0f\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#2-pickle","text":"#\u8b80\u53d6Model with open ( './model/xgboost-iris.pickle' , 'rb' ) as f : xgboostModel = pickle . load ( f ) pred = xgboostModel . predict ( np . array ([[ 5.5 , 2.4 , 3.7 , 1. ]])) print ( pred )","title":"2. \u8f09\u5165 pickle \u683c\u5f0f\u6a21\u578b"},{"location":"28.\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b/#reference","text":"How to save and load your Scikit-learn models in a minute Don't Pickle Your Data \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/","text":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 API \u89c0\u5ff5\u8b1b\u89e3 \u4ec0\u9ebc\u662f API\uff1f RESTful API HTTP Request \u65b9\u6cd5 \u624b\u628a\u624b\u5be6\u4f5c\u4e00\u500b\u82b1\u6735\u5206\u985e\u5668 API \u900f\u904e Python Flask \u5efa\u7f6e\u4e00\u500b\u5f8c\u7aef\u9810\u6e2c\u6a21\u578b API \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code \u524d\u8a00 \u7576\u6a21\u578b\u8a13\u7df4\u5b8c\u4ee5\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u5c31\u662f\u61c9\u7528\u8207\u843d\u5730\u3002\u6211\u5011\u53ef\u4ee5\u8a2d\u8a08\u4e00\u500b\u5d4c\u5165\u5f0f\u7cfb\u7d71\u8207\u4f7f\u7528\u8005\u4e92\u52d5\uff0c\u4f8b\u5982\u6a39\u8393\u6d3e\u3001Jetson Nano\u3001NeuroPilot...\u7b49\u786c\u9ad4\u4f86\u5354\u52a9 AI \u6a21\u578b\u7684\u908a\u7de3\u904b\u7b97\u3002\u6216\u662f\u8a2d\u8a08\u4e00\u500b\u624b\u6a5f APP \u4ee5\u53ca\u7db2\u9801\u61c9\u7528\u3002\u5f88\u591a\u4eba\u53ef\u80fd\u6703\u6709\u7591\u554f\u6a21\u578b\u8a13\u7df4\u597d\u7136\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u8a72\u600e\u505a\uff1f\u6700\u5e38\u898b\u7684\u505a\u6cd5\u5c31\u662f\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\u4e26\u5efa\u7acb\u4e00\u500b API \u90e8\u7f72\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u4e2d\uff0c\u63a5\u8457\u4efb\u4f55\u7684\u7d42\u7aef\u8a2d\u5099\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e00\u500b API \u9032\u884c\u8cc7\u6599\u7684\u5b58\u53d6\u8207\u6a21\u578b\u9810\u6e2c\u3002\u4e0b\u5716\u662f\u4e00\u500b\u7c21\u55ae\u7684\u6a21\u578b\u843d\u5730\u7684\u61c9\u7528\u60c5\u5883\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u90e8\u7f72\u6a21\u578b\u4e26\u5efa\u7acb\u4e00\u500b API \u7684\u63a5\u53e3\u8207\u524d\u7aef\u4f7f\u7528\u8005\u4e92\u52d5\u3002\u524d\u7aef\u7db2\u9801\u7684\u4f7f\u7528\u8005\u900f\u904e HTTP \u7684\u5354\u5b9a\u8207\u5f8c\u7aef\u4f3a\u670d\u5668\u9032\u884c\u901a\u8a0a\u8207\u8cc7\u6599\u4ea4\u63db\uff0c\u6700\u7d42\u6a21\u578b\u7684\u9810\u6e2c\u7d50\u679c\u6703\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u4e26\u5c07\u7d50\u679c\u9078\u67d3\u5728\u7db2\u9801\u4e0a\u3002\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u5167\u5bb9 [Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \uff0c\u76ee\u524d\u5df2\u7d93\u6210\u529f\u7684\u8f38\u51fa\u6a21\u578b\u3002\u4eca\u5929\u5c31\u4f86\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Python Flask \u67b6\u8a2d\u4e00\u500b\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u7684 API \u5427\uff01 \u4ec0\u9ebc\u662f API\uff1f \u6240\u8b02\u7684 API \u4e2d\u6587\u5168\u540d\u70ba\u61c9\u7528\u7a0b\u5f0f\u4ecb\u9762 (Application Programming Interface) \u662f\u5c6c\u65bc\u5ba2\u6236\u7aef\u8207\u4f3a\u670d\u7aef\u7684\u6e9d\u901a\u6a4b\u6a11\uff0c\u5b83\u63d0\u4f9b\u4e00\u500b\u7aef\u53e3\u80fd\u5920\u9032\u884c\u8cc7\u6599\u4ea4\u63db\u3002\u7c21\u55ae\u4f86\u8aaa\u662f\u4e00\u500b\u524d\u7aef\u8207\u5f8c\u7aef\u7684\u4e00\u500b\u6e9d\u901a\u4ecb\u9762\u3002 \u53e6\u5916\u5927\u5bb6\u53ef\u80fd\u807d\u904e\u4e00\u500b\u540d\u8a5e\u53eb\u505a RESTful API\u3002\u6240\u8b02\u7684 REST \u70ba Representational State Transfer \u7684\u7e2e\u5beb\u662f\u4e00\u7a2e\u7db2\u8def\u67b6\u69cb\u98a8\u683c\uff0c\u8fd1\u5e7e\u5e74\u4f86 REST \u7684\u6982\u5ff5\u5df2\u7d93\u88ab\u5be6\u4f5c\u5728\u5927\u578b\u7db2\u8def\u7cfb\u7d71\u4e2d\uff0c\u800c\u5728 Web \u670d\u52d9\u4e2d\u4f7f\u7528 REST \u6982\u5ff5\u88ab\u5be6\u4f5c\u51fa\u4f86\u7684 API \u5c31\u7c21\u7a31\u70ba RESTful API \u4ed6\u662f\u4f7f\u7528 HTTP \u7684\u5354\u5b9a\u5b8c\u6574\u5b9a\u7fa9 Web \u670d\u52d9\u5728 HTTP Request \u7684\u5404\u7a2e\u6d41\u7a0b\u3002 HTTP Request \u65b9\u6cd5 \u900f\u904e\u7db2\u8def\u5354\u5b9a HTTP Request \u4e0d\u540c\u7684\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5be6\u73fe\u4e0d\u540c\u7684\u8cc7\u6599\u4ea4\u63db\u8acb\u6c42\u65b9\u5f0f\u3002HTTP \u672c\u8eab\u5c31\u662f REST \u7684\u5be6\u4f5c\uff0c\u6240\u8b02\u7684 HTTP Request \u5b9a\u7fa9\u4e86\u516b\u7a2e\u8acb\u6c42\u65b9\u6cd5\u5206\u5225\u70ba\uff1a GET \uff1a\u6b64\u65b9\u6cd5\u53ea\u80fd\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u8981\u6c42\u53d6\u5f97\u8cc7\u6599\uff0c\u4e26\u4e0d\u6703\u66f4\u52d5\u5230\u5167\u90e8\u8cc7\u6e90\u3002 HEAD\uff1aHEAD \u8ddf GET \u65b9\u6cd5\u985e\u4f3c\u53ea\u5dee\u5225\u5728\u5b83\u4e26\u4e0d\u6703\u56de\u50b3\u4f60\u6240\u8acb\u6c42\u7684\u8cc7\u6e90\u5728 body \u4e0a\uff0c\u53ea\u56de\u50b3 HTTP header\u3002 POST \uff1a\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u63d0\u4ea4\u8cc7\u6599\u3002 PUT \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u63d0\u4ea4\u66f4\u65b0\u5167\u5bb9\u3002 DELETE \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u8acb\u6c42\u522a\u9664\u5167\u5bb9\u3002 CONNECT\uff1aHTTP/1.1\u5354\u8b70\u4e2d\u9810\u7559\u7d66\u80fd\u5920\u5c07\u9023\u63a5\u6539\u70ba\u7ba1\u9053\u65b9\u5f0f\u7684\u4ee3\u7406\u670d\u52d9\u5668\u3002 OPTIONS\uff1a\u6b64\u65b9\u6cd5\u53ef\u4f7f\u670d\u52d9\u5668\u50b3\u56de\u8a72\u8cc7\u6e90\u6240\u652f\u6301\u7684\u6240\u6709 HTTP \u8acb\u6c42\u65b9\u6cd5\u3002 TRACE\uff1a\u56de\u986f\u670d\u52d9\u5668\u6536\u5230\u7684\u8acb\u6c42\uff0c\u4e3b\u8981\u7528\u65bc\u6e2c\u8a66\u6216\u8a3a\u65b7\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 API \u5efa\u7acb Python Flask API Flask \u662f\u4e00\u500b\u4f7f\u7528 Python \u8a9e\u8a00\u7de8\u5beb\u7684\u8f15\u91cf\u7d1a Web \u61c9\u7528\u6846\u67b6\u3002\u5728\u4eca\u65e5\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u5ef6\u7e8c\u6628\u5929\u6240\u5132\u5b58\u7684\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u6a21\u578b\uff0c\u5efa\u7acb\u4e00\u500b\u82b1\u6735\u5206\u985e\u9810\u6e2c\u7684 API\u3002\u4f7f\u7528\u8005\u53ef\u4ee5\u900f\u904e POST \u5354\u5b9a\u5f9e\u524d\u7aef\u7db2\u9801\u767c\u9001\u56db\u500b\u6578\u503c\u5206\u5225\u70ba\u82b1\u843c\u7684\u9577\u8207\u5bec\u4ee5\u53ca\u82b1\u74e3\u7684\u9577\u8207\u5bec\u3002\u5f8c\u7aef\u7a0b\u5f0f\u6536\u5230\u6578\u503c\u5f8c\u9001\u7d66\u4e8b\u5148\u6253\u5305\u597d\u7684\u6a21\u578b\uff0c\u4e26\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u4ee5\u4e0b\u70ba\u7a0b\u5f0f\u6574\u500b\u6a39\u72c0\u7d50\u69cb\uff0c\u5176\u4e2d\u5728\u6700\u5916\u5c64\u8cc7\u6599\u593e\u6709\u4e09\u500b\u6a94\u6848\u5206\u5225\u6709\u5c07\u6a21\u578b\u5c01\u88dd\u6210\u51fd\u5f0f\u7684 model.py \u8207 Flask \u4e3b\u7a0b\u5f0f run.py \u4ee5\u53ca\u8ca0\u8cac\u7ba1\u7406\u5c08\u6848\u5957\u4ef6\u7684 requirements.txt \u3002\u53e6\u5916\u5728 model \u8cc7\u6599\u593e\u4e2d\u8ca0\u8cac\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u58d3\u7e2e\u6a94\u3002 . \u251c\u2500\u2500 model \u2502 \u2514\u2500\u2500 xgboost-iris.pgz \u251c\u2500\u2500 model.py \u251c\u2500\u2500 requirements.txt \u2514\u2500\u2500 run.py \u5c01\u88dd\u9810\u6e2c\u6a21\u578b (model.py) \u9996\u5148\u5efa\u7acb\u4e00\u500b model.py \u6a94\u6848\uff0c\u5728\u9019\u500b\u6a94\u6848\u4e2d\u6211\u5011\u8981\u8f09\u5165\u4e8b\u5148\u8a13\u7df4\u597d\u7684\u6a21\u578b\u4e26\u5c07\u5b83\u5c01\u88dd\u6210\u4e00\u500b function \u6216\u662f class\u3002\u5728\u672c\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u662f\u5efa\u7acb\u4e00\u500b predict() \u7684\u51fd\u5f0f\u4e26\u4e14\u5141\u8a31\u63a5\u6536\u4e00\u500b Numpy \u7684\u9663\u5217\uff0c\u5176\u4e2d\u88e1\u9762\u5141\u8a31\u593e\u5e36\u56db\u500b\u82b1\u6735\u7279\u5fb5\u7684\u6578\u503c\u3002\u6700\u5f8c\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u5b58\u653e\u5728 pred \u8b8a\u6578\u4e2d\uff0c\u4e26\u5c07\u9810\u6e2c\u7684\u985e\u5225\u56de\u50b3\u3002 # -*- coding: UTF-8 -*- import pickle import gzip # \u8f09\u5165\u6a21\u578b with gzip . open ( './model/xgboost-iris.pgz' , 'rb' ) as f : xgboostModel = pickle . load ( f ) # \u5c07\u6a21\u578b\u9810\u6e2c\u5beb\u6210\u4e00\u500b function def predict ( input ): pred = xgboostModel . predict ( input )[ 0 ] return pred \u5efa\u7acb Flask API (run.py) \u63a5\u8457\u6211\u5011\u8981\u900f\u904e Flask \u5efa\u7acb\u4e00\u500b API\uff0c\u9996\u5148\u8981\u8a2d\u5b9a\u958b\u653e\u8de8\u7db2\u57df CORS \u6b0a\u9650\u3002\u6240\u8b02\u7684\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab (Cross-Origin Resource Sharing, CORS) \u662f\u4e00\u7a2e\u4f7f\u7528\u984d\u5916 HTTP \u6a19\u982d\u4f86\u8b93\u76ee\u524d\u700f\u89bd\u7db2\u7ad9\u7684\u4f7f\u7528\u8005\u80fd\u8a2a\u554f\u4e0d\u540c\u4f86\u6e90\u7db2\u57df\u7684\u4f3a\u670d\u5668\u3002\u7576\u4f7f\u7528\u8005\u8acb\u6c42\u4e00\u500b\u4f86\u81ea\u65bc\u4e0d\u540c\u7db2\u57df\u3001\u901a\u8a0a\u5354\u5b9a\u6216\u901a\u8a0a\u57e0\u7684\u8cc7\u6e90\u6642\uff0c\u6703\u5efa\u7acb\u4e00\u500b\u8de8\u4f86\u6e90 HTTP \u8acb\u6c42\u3002\u6240\u4ee5\u5728\u64b0\u5beb\u7a0b\u5f0f\u7684\u6642\u5019\u5fc5\u9808\u900f\u904e flask_cors \u88e1\u9762\u6240\u63d0\u4f9b\u7684 CORS \u6dfb\u52a0\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab\u3002\u9019\u6a23\u524d\u7aef\u4f7f\u7528\u8005\u5728\u4e0d\u540c\u7db2\u57df\u5229\u7528 ajax \u6216 fetch \u5b58\u53d6 API \u6642\u5c31\u6703\u6709\u8b80\u53d6\u6b0a\u9650\u3002 # -*- coding: UTF-8 -*- import numpy as np import model from flask import Flask , request , jsonify from flask_cors import CORS app = Flask ( __name__ ) CORS ( app ) \u63a5\u8457\u6211\u5011\u5148\u793a\u7bc4\u5efa\u7acb\u4e00\u500b GET \u7684\u8def\u7531 @app.route('/') \uff0c\u55ae\u5f15\u865f\u5167\u7684\u5167\u5bb9\u5373\u4ee3\u8868\u4f7f\u7528\u8005\u5728\u547c\u53eb API \u7684\u8def\u5f91\u4f4d\u7f6e / \u4ee3\u8868\u662f root \u7684\u610f\u601d\u3002\u5728\u9019\u4e00\u500b\u6e2c\u8a66\u7684\u8def\u7531\u4e2d\u6211\u5011\u76f4\u63a5\u56de\u50b3\u4e00\u500b hello!! \u7684\u5b57\u4e32\u3002\u7a0d\u5f8c\u5c07\u6703\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Postman \u9019\u500b\u8edf\u9ad4\u4f86\u6e2c\u8a66 API\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u8def\u7531\u662f\u8ca0\u8cac\u63a5\u6536\u82b1\u6735\u56db\u500b\u6578\u503c\uff0c\u4e26\u5c07\u9019\u56db\u500b\u6578\u503c\u653e\u5230 Numpy \u9663\u5217\u4e2d\u9001\u5230\u7a0d\u65e9\u4ee5\u5c01\u88dd\u597d\u7684 mpdel.py \u4e2d\u7684 predict() \u65b9\u6cd5\u3002\u4e26\u5c07\u9810\u6e2c\u7684\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u61c9\u7d66\u524d\u7aef\u4f7f\u7528\u8005\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\u9019\u4e00\u500b\u8def\u7531\u6211\u5011\u662f\u8a2d\u5b9a\u4ed6\u7684\u8def\u5f91\u70ba /predict \u4ee5\u53ca HTTP Requesst \u7684\u65b9\u6cd5\u6307\u5b9a methods=['POST'] \u3002 @app . route ( '/' ) def index (): return 'hello!!' @app . route ( '/predict' , methods = [ 'POST' ]) def postInput (): # \u53d6\u5f97\u524d\u7aef\u50b3\u904e\u4f86\u7684\u6578\u503c insertValues = request . get_json () x1 = insertValues [ 'sepalLengthCm' ] x2 = insertValues [ 'sepalWidthCm' ] x3 = insertValues [ 'petalLengthCm' ] x4 = insertValues [ 'petalWidthCm' ] input = np . array ([[ x1 , x2 , x3 , x4 ]]) result = model . predict ( input ) return jsonify ({ 'return' : str ( result )}) \u6700\u5f8c\u6211\u5011\u900f\u904e app.run() \u5c07\u6b64 API \u90e8\u7f72\u5728\u4f3a\u670d\u5668\u7684 3000 PORT \u7576\u4e2d\u3002 host='0.0.0.0' \u8868\u793a\u9810\u8a2d\u8def\u7531\u5c07\u6703\u81ea\u52d5\u5e6b\u4f60\u53d6\u5f97\u76ee\u524d\u4f3a\u670d\u5668\u7684\u56fa\u5b9a IP \u4f4d\u7f6e\u3002\u7531\u65bc\u6211\u5011\u76ee\u524d\u5728\u672c\u6a5f\u958b\u767c\u7b49\u7b49\u6e2c\u8a66\u6642\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 http://localhost:3000 \u9032\u884c\u6e2c\u8a66\u3002\u53e6\u5916\u53c3\u6578 debug \u8a2d\u5b9a\u70ba True \u5373\u8868\u793a API \u88ab\u555f\u52d5\u6642\u6703\u81ea\u52d5\u76e3\u807d\u7a0b\u5f0f\u662f\u5426\u6709\u8b8a\u52d5\uff0c\u5982\u679c\u6709\u66f4\u65b0\u5167\u5bb9\u5373\u6703\u7acb\u523b\u91cd\u65b0\u555f\u52d5 API\u3002\u6b64\u8a2d\u5b9a\u9069\u5408\u5728\u958b\u767c\u6642\u5019\u4f7f\u7528\uff0c\u800c\u771f\u6b63\u4e0a\u7dda\u6642\u518d\u8abf\u6210 False\u3002 if __name__ == '__main__' : app . run ( host = '0.0.0.0' , port = 3000 , debug = True ) \u7ba1\u7406\u5957\u4ef6\u7248\u672c (requirements.txt) requirements.txt \u9019\u4e00\u652f\u6a94\u6848\u662f\u8ca0\u8a18\u9304\u4e86\u7576\u524d\u5c08\u6848\u8cc7\u6599\u593e\u4e0b\u7a0b\u5f0f\u6240\u6709\u4f9d\u8cf4\u7684\u5957\u4ef6\u53ca\u76f8\u5c0d\u61c9\u7684\u7248\u672c\u3002\u4e0b\u5217\u4e94\u500b\u662f\u5728\u672c\u5be6\u4f5c\u4e2d\u5c07\u6703\u4f7f\u7528\u5230\u7684\u5957\u4ef6\uff0c\u82e5\u5957\u4ef6\u5f8c\u9762\u6c92\u6709\u7279\u5225\u6307\u5b9a\u7248\u672c\u865f\uff0c\u5b89\u88dd\u6642\u5c07\u6703\u81ea\u52d5\u5b89\u88dd\u6700\u65b0\u7684\u7248\u672c\u3002 Flask Flask-Cors numpy scikit-learn xgboost \u5047\u8a2d\u7a0b\u5f0f\u5728\u53e6\u4e00\u53f0\u96fb\u8166\u4e0a\u57f7\u884c\u6642\uff0c\u8981\u4e00\u500b\u4e00\u500b\u5b89\u88dd\u5957\u4ef6\u5f88\u9ebb\u7169\u3002\u56e0\u6b64\u53ef\u4ee5\u76f4\u63a5\u900f\u904e requirements.txt \u7d00\u9304\u5c08\u6848\u4e2d\u4f9d\u8cf4\u7684\u5957\u4ef6\u3002\u4e26\u4e14\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\u4e00\u6b21\u5b89\u88dd\u6240\u6709\u6307\u5b9a\u7684\u5957\u4ef6\u3002 pip install -r requirements.txt \u57f7\u884c API \u5728\u672c\u6a5f\u6216\u958b\u767c\u74b0\u5883\u4e2d\u6e2c\u8a66\u57f7\u884c API \u7684\u65b9\u5f0f\u5f88\u7c21\u55ae\u3002\u53ea\u8981\u958b\u555f\u7d42\u7aef\u6a5f\u4e26\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\uff1a python run.py \u7a0b\u5f0f\u771f\u6b63\u4e0a\u7dda\u6642\u5efa\u8b70\u4f7f\u7528 gunicorn \u6216 Waitress \u4f86\u7522\u751f WSGI \u670d\u52d9\uff0c\u4e26\u65bc\u80cc\u666f\u904b\u884c \u9084\u8a18\u5f97\u6211\u5011\u6709\u5beb\u4e00\u500b GET \u65b9\u6cd5\u7684\u6e2c\u8a66\u8def\u7531\u55ce\uff1f\u9019\u6642\u5019\u5927\u5bb6\u53ef\u4ee5\u958b\u555f\u96fb\u8166\u4e2d\u7684\u700f\u89bd\u5668\u4e26\u5728\u7db2\u5740\u5217\u8f38\u5165 http://localhost:3000 \u5373\u53ef\u7acb\u5373\u770b\u5230 API \u5728\u6307\u5b9a\u7684\u8def\u5f91\u4e0b\u6240\u56de\u61c9\u7684\u5167\u5bb9\u3002\u5982\u679c\u51fa\u73fe\u4ee5\u4e0b\u756b\u9762\u5373\u4ee3\u8868 API \u5df2\u7d93\u6b63\u5e38\u7684\u88ab\u904b\u884c\u56c9\u3002 \u90a3\u4f60\u53ef\u80fd\u6703\u554f\u6211\u8a72\u600e\u9ebc\u6e2c\u8a66\u53e6\u4e00\u500b POST \u65b9\u6cd5\u5462\uff1f\u7531\u65bc GET \u65b9\u6cd5\u6bd4\u8f03\u597d\u8655\u7406\uff0c\u6211\u5011\u76f4\u63a5\u5728\u700f\u89bd\u5668\u8f38\u5165\u8def\u5f91\u5c31\u80fd\u7acb\u5373\u89c0\u770b\u7d50\u679c\u3002\u90a3\u7576\u6211\u5011\u8981\u6e2c\u8a66 POST\u3001PUT\u3001DELETE \u7b49\u65b9\u6cd5\u6642\u5c31\u5fc5\u9808\u4f9d\u9760\u7b2c\u4e09\u65b9\u8edf\u9ad4 Postman \u4f86\u5354\u52a9\u6a21\u64ec HTTP Request \u5b8c\u6210 API \u6e2c\u8a66\u3002 \u6e2c\u8a66 API \u7684\u597d\u5de5\u5177 Postman \u7576\u4f60\u5beb\u597d\u4e00\u652f API \u6642\u8981\u99ac\u4e0a\u6e2c\u8a66\u770b\u770b\u4f60\u5beb\u7684\u7a0b\u5f0f\u908f\u8f2f\u662f\u5426\u6b63\u78ba\uff0c\u5c31\u53ef\u4ee5\u4f7f\u7528 Postman \u9019\u500b\u8edf\u9ad4\u4f86\u505a API \u6e2c\u8a66\u3002Postman \u4ed6\u662f\u4e00\u500b\u80fd\u5920\u6a21\u64ec HTTP Request \u7684\u5de5\u5177\u80fd\u5920\u8b93\u4f60\u7c21\u55ae\u5feb\u901f\u7684\u6e2c\u8a66\u4f60\u7684 API\uff0c\u4e26\u4e14\u5167\u5efa\u5305\u542b\u8a31\u591a HTTP \u7684\u8acb\u6c42\u65b9\u5f0f\uff0c\u4f8b\u5982\u5e38\u898b\u7684 GET(\u53d6\u5f97)\u3001POST(\u65b0\u589e)\u3001PUT(\u4fee\u6539)\u3001DELETE(\u522a\u9664)\u3002\u9996\u5148\u5927\u5bb6\u53ef\u4ee5\u5230 \u5b98\u7db2 \u4e0b\u8f09\u8207\u5b89\u88dd\u3002 \u5b89\u88dd\u597d\u4e4b\u5f8c\u53ef\u4ee5\u6253\u958b\u7a0b\u5f0f\u4e26\u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740\u3002\u7531\u65bc\u6211\u5011\u73fe\u5728\u8981\u6e2c\u8a66\u53e6\u4e00\u500b\u9810\u6e2c\u7684\u8def\u5f91 predict \uff0c\u56e0\u6b64\u5728\u7db2\u5740\u5217\u8cbc\u4e0a http://localhost:3000/predict \u3002\u9ede\u9078 Body-> raw -> JSON \u4e26\u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u9032\u884c\u63cf\u8ff0\u3002 { \"sepalLengthCm\" : 5.9 , \"sepalWidthCm\" : 3 , \"petalLengthCm\" : 5.1 , \"petalWidthCm\" : 1.8 } \u9ede\u9078 send \u5f8c\u5373\u53ef\u5c07\u6a21\u64ec\u7684\u56db\u500b\u6578\u503c\u900f\u904e JSON \u683c\u5f0f\u4f7f\u7528 POST \u65b9\u6cd5\u50b3\u9001\u5230\u5f8c\u7aef API \u4e2d\u7684 predict \u8def\u5f91\u3002\u8a72 API \u900f\u904e POST \u63a5\u6536\u5230\u524d\u7aef\u4f7f\u7528\u8005\u6240\u767c\u9001\u7684\u8a0a\u606f\u5f8c\uff0c\u89e3\u6790\u9019\u56db\u500b\u6578\u503c\u4e26\u4f9d\u5e8f\u653e\u5728\u9663\u5217\u4e2d\u4e26\u9032\u884c\u6a21\u578b\u9810\u6e2c\u3002\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6703\u5c07\u82b1\u7684\u7a2e\u985e\u4ee5 JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u6b64\u6642\u524d\u7aef\u7684\u7db2\u9801\u8a2d\u8a08\u5e2b\u5c31\u53ef\u4ee5\u5c07\u62ff\u5230\u7684\u9810\u6e2c\u7d50\u679c\u9032\u884c\u524d\u7aef\u7684\u756b\u9762\u6e32\u67d3\u8207\u66f4\u65b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#day-29-python-flask-api","text":"","title":"[Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#_1","text":"API \u89c0\u5ff5\u8b1b\u89e3 \u4ec0\u9ebc\u662f API\uff1f RESTful API HTTP Request \u65b9\u6cd5 \u624b\u628a\u624b\u5be6\u4f5c\u4e00\u500b\u82b1\u6735\u5206\u985e\u5668 API \u900f\u904e Python Flask \u5efa\u7f6e\u4e00\u500b\u5f8c\u7aef\u9810\u6e2c\u6a21\u578b API \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#_2","text":"\u7576\u6a21\u578b\u8a13\u7df4\u5b8c\u4ee5\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u5c31\u662f\u61c9\u7528\u8207\u843d\u5730\u3002\u6211\u5011\u53ef\u4ee5\u8a2d\u8a08\u4e00\u500b\u5d4c\u5165\u5f0f\u7cfb\u7d71\u8207\u4f7f\u7528\u8005\u4e92\u52d5\uff0c\u4f8b\u5982\u6a39\u8393\u6d3e\u3001Jetson Nano\u3001NeuroPilot...\u7b49\u786c\u9ad4\u4f86\u5354\u52a9 AI \u6a21\u578b\u7684\u908a\u7de3\u904b\u7b97\u3002\u6216\u662f\u8a2d\u8a08\u4e00\u500b\u624b\u6a5f APP \u4ee5\u53ca\u7db2\u9801\u61c9\u7528\u3002\u5f88\u591a\u4eba\u53ef\u80fd\u6703\u6709\u7591\u554f\u6a21\u578b\u8a13\u7df4\u597d\u7136\u5f8c\u4e0b\u4e00\u500b\u6b65\u9a5f\u8a72\u600e\u505a\uff1f\u6700\u5e38\u898b\u7684\u505a\u6cd5\u5c31\u662f\u5c07\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5132\u5b58\u8d77\u4f86\u4e26\u5efa\u7acb\u4e00\u500b API \u90e8\u7f72\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u4e2d\uff0c\u63a5\u8457\u4efb\u4f55\u7684\u7d42\u7aef\u8a2d\u5099\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e00\u500b API \u9032\u884c\u8cc7\u6599\u7684\u5b58\u53d6\u8207\u6a21\u578b\u9810\u6e2c\u3002\u4e0b\u5716\u662f\u4e00\u500b\u7c21\u55ae\u7684\u6a21\u578b\u843d\u5730\u7684\u61c9\u7528\u60c5\u5883\uff0c\u6211\u5011\u53ef\u4ee5\u5728\u5f8c\u7aef\u4f3a\u670d\u5668\u90e8\u7f72\u6a21\u578b\u4e26\u5efa\u7acb\u4e00\u500b API \u7684\u63a5\u53e3\u8207\u524d\u7aef\u4f7f\u7528\u8005\u4e92\u52d5\u3002\u524d\u7aef\u7db2\u9801\u7684\u4f7f\u7528\u8005\u900f\u904e HTTP \u7684\u5354\u5b9a\u8207\u5f8c\u7aef\u4f3a\u670d\u5668\u9032\u884c\u901a\u8a0a\u8207\u8cc7\u6599\u4ea4\u63db\uff0c\u6700\u7d42\u6a21\u578b\u7684\u9810\u6e2c\u7d50\u679c\u6703\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u4e26\u5c07\u7d50\u679c\u9078\u67d3\u5728\u7db2\u9801\u4e0a\u3002\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u5167\u5bb9 [Day 28] \u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b \uff0c\u76ee\u524d\u5df2\u7d93\u6210\u529f\u7684\u8f38\u51fa\u6a21\u578b\u3002\u4eca\u5929\u5c31\u4f86\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Python Flask \u67b6\u8a2d\u4e00\u500b\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u7684 API \u5427\uff01","title":"\u524d\u8a00"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api","text":"\u6240\u8b02\u7684 API \u4e2d\u6587\u5168\u540d\u70ba\u61c9\u7528\u7a0b\u5f0f\u4ecb\u9762 (Application Programming Interface) \u662f\u5c6c\u65bc\u5ba2\u6236\u7aef\u8207\u4f3a\u670d\u7aef\u7684\u6e9d\u901a\u6a4b\u6a11\uff0c\u5b83\u63d0\u4f9b\u4e00\u500b\u7aef\u53e3\u80fd\u5920\u9032\u884c\u8cc7\u6599\u4ea4\u63db\u3002\u7c21\u55ae\u4f86\u8aaa\u662f\u4e00\u500b\u524d\u7aef\u8207\u5f8c\u7aef\u7684\u4e00\u500b\u6e9d\u901a\u4ecb\u9762\u3002 \u53e6\u5916\u5927\u5bb6\u53ef\u80fd\u807d\u904e\u4e00\u500b\u540d\u8a5e\u53eb\u505a RESTful API\u3002\u6240\u8b02\u7684 REST \u70ba Representational State Transfer \u7684\u7e2e\u5beb\u662f\u4e00\u7a2e\u7db2\u8def\u67b6\u69cb\u98a8\u683c\uff0c\u8fd1\u5e7e\u5e74\u4f86 REST \u7684\u6982\u5ff5\u5df2\u7d93\u88ab\u5be6\u4f5c\u5728\u5927\u578b\u7db2\u8def\u7cfb\u7d71\u4e2d\uff0c\u800c\u5728 Web \u670d\u52d9\u4e2d\u4f7f\u7528 REST \u6982\u5ff5\u88ab\u5be6\u4f5c\u51fa\u4f86\u7684 API \u5c31\u7c21\u7a31\u70ba RESTful API \u4ed6\u662f\u4f7f\u7528 HTTP \u7684\u5354\u5b9a\u5b8c\u6574\u5b9a\u7fa9 Web \u670d\u52d9\u5728 HTTP Request \u7684\u5404\u7a2e\u6d41\u7a0b\u3002","title":"\u4ec0\u9ebc\u662f API\uff1f"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#http-request","text":"\u900f\u904e\u7db2\u8def\u5354\u5b9a HTTP Request \u4e0d\u540c\u7684\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5be6\u73fe\u4e0d\u540c\u7684\u8cc7\u6599\u4ea4\u63db\u8acb\u6c42\u65b9\u5f0f\u3002HTTP \u672c\u8eab\u5c31\u662f REST \u7684\u5be6\u4f5c\uff0c\u6240\u8b02\u7684 HTTP Request \u5b9a\u7fa9\u4e86\u516b\u7a2e\u8acb\u6c42\u65b9\u6cd5\u5206\u5225\u70ba\uff1a GET \uff1a\u6b64\u65b9\u6cd5\u53ea\u80fd\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u8981\u6c42\u53d6\u5f97\u8cc7\u6599\uff0c\u4e26\u4e0d\u6703\u66f4\u52d5\u5230\u5167\u90e8\u8cc7\u6e90\u3002 HEAD\uff1aHEAD \u8ddf GET \u65b9\u6cd5\u985e\u4f3c\u53ea\u5dee\u5225\u5728\u5b83\u4e26\u4e0d\u6703\u56de\u50b3\u4f60\u6240\u8acb\u6c42\u7684\u8cc7\u6e90\u5728 body \u4e0a\uff0c\u53ea\u56de\u50b3 HTTP header\u3002 POST \uff1a\u5411\u6307\u5b9a\u7684\u8cc7\u6e90\u63d0\u4ea4\u8cc7\u6599\u3002 PUT \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u63d0\u4ea4\u66f4\u65b0\u5167\u5bb9\u3002 DELETE \uff1a\u5411\u6307\u5b9a\u8cc7\u6e90\u4f4d\u7f6e\u8acb\u6c42\u522a\u9664\u5167\u5bb9\u3002 CONNECT\uff1aHTTP/1.1\u5354\u8b70\u4e2d\u9810\u7559\u7d66\u80fd\u5920\u5c07\u9023\u63a5\u6539\u70ba\u7ba1\u9053\u65b9\u5f0f\u7684\u4ee3\u7406\u670d\u52d9\u5668\u3002 OPTIONS\uff1a\u6b64\u65b9\u6cd5\u53ef\u4f7f\u670d\u52d9\u5668\u50b3\u56de\u8a72\u8cc7\u6e90\u6240\u652f\u6301\u7684\u6240\u6709 HTTP \u8acb\u6c42\u65b9\u6cd5\u3002 TRACE\uff1a\u56de\u986f\u670d\u52d9\u5668\u6536\u5230\u7684\u8acb\u6c42\uff0c\u4e3b\u8981\u7528\u65bc\u6e2c\u8a66\u6216\u8a3a\u65b7\u3002","title":"HTTP Request \u65b9\u6cd5"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api_1","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c] \u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#python-flask-api","text":"Flask \u662f\u4e00\u500b\u4f7f\u7528 Python \u8a9e\u8a00\u7de8\u5beb\u7684\u8f15\u91cf\u7d1a Web \u61c9\u7528\u6846\u67b6\u3002\u5728\u4eca\u65e5\u7684\u6587\u7ae0\u4e2d\u6211\u5011\u5c07\u5ef6\u7e8c\u6628\u5929\u6240\u5132\u5b58\u7684\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668\u6a21\u578b\uff0c\u5efa\u7acb\u4e00\u500b\u82b1\u6735\u5206\u985e\u9810\u6e2c\u7684 API\u3002\u4f7f\u7528\u8005\u53ef\u4ee5\u900f\u904e POST \u5354\u5b9a\u5f9e\u524d\u7aef\u7db2\u9801\u767c\u9001\u56db\u500b\u6578\u503c\u5206\u5225\u70ba\u82b1\u843c\u7684\u9577\u8207\u5bec\u4ee5\u53ca\u82b1\u74e3\u7684\u9577\u8207\u5bec\u3002\u5f8c\u7aef\u7a0b\u5f0f\u6536\u5230\u6578\u503c\u5f8c\u9001\u7d66\u4e8b\u5148\u6253\u5305\u597d\u7684\u6a21\u578b\uff0c\u4e26\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u4ee5\u4e0b\u70ba\u7a0b\u5f0f\u6574\u500b\u6a39\u72c0\u7d50\u69cb\uff0c\u5176\u4e2d\u5728\u6700\u5916\u5c64\u8cc7\u6599\u593e\u6709\u4e09\u500b\u6a94\u6848\u5206\u5225\u6709\u5c07\u6a21\u578b\u5c01\u88dd\u6210\u51fd\u5f0f\u7684 model.py \u8207 Flask \u4e3b\u7a0b\u5f0f run.py \u4ee5\u53ca\u8ca0\u8cac\u7ba1\u7406\u5c08\u6848\u5957\u4ef6\u7684 requirements.txt \u3002\u53e6\u5916\u5728 model \u8cc7\u6599\u593e\u4e2d\u8ca0\u8cac\u5132\u5b58\u8a13\u7df4\u597d\u7684\u6a21\u578b\u58d3\u7e2e\u6a94\u3002 . \u251c\u2500\u2500 model \u2502 \u2514\u2500\u2500 xgboost-iris.pgz \u251c\u2500\u2500 model.py \u251c\u2500\u2500 requirements.txt \u2514\u2500\u2500 run.py","title":"\u5efa\u7acb Python Flask API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#modelpy","text":"\u9996\u5148\u5efa\u7acb\u4e00\u500b model.py \u6a94\u6848\uff0c\u5728\u9019\u500b\u6a94\u6848\u4e2d\u6211\u5011\u8981\u8f09\u5165\u4e8b\u5148\u8a13\u7df4\u597d\u7684\u6a21\u578b\u4e26\u5c07\u5b83\u5c01\u88dd\u6210\u4e00\u500b function \u6216\u662f class\u3002\u5728\u672c\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u662f\u5efa\u7acb\u4e00\u500b predict() \u7684\u51fd\u5f0f\u4e26\u4e14\u5141\u8a31\u63a5\u6536\u4e00\u500b Numpy \u7684\u9663\u5217\uff0c\u5176\u4e2d\u88e1\u9762\u5141\u8a31\u593e\u5e36\u56db\u500b\u82b1\u6735\u7279\u5fb5\u7684\u6578\u503c\u3002\u6700\u5f8c\u5c07\u6a21\u578b\u9810\u6e2c\u7d50\u679c\u5b58\u653e\u5728 pred \u8b8a\u6578\u4e2d\uff0c\u4e26\u5c07\u9810\u6e2c\u7684\u985e\u5225\u56de\u50b3\u3002 # -*- coding: UTF-8 -*- import pickle import gzip # \u8f09\u5165\u6a21\u578b with gzip . open ( './model/xgboost-iris.pgz' , 'rb' ) as f : xgboostModel = pickle . load ( f ) # \u5c07\u6a21\u578b\u9810\u6e2c\u5beb\u6210\u4e00\u500b function def predict ( input ): pred = xgboostModel . predict ( input )[ 0 ] return pred","title":"\u5c01\u88dd\u9810\u6e2c\u6a21\u578b (model.py)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#flask-api-runpy","text":"\u63a5\u8457\u6211\u5011\u8981\u900f\u904e Flask \u5efa\u7acb\u4e00\u500b API\uff0c\u9996\u5148\u8981\u8a2d\u5b9a\u958b\u653e\u8de8\u7db2\u57df CORS \u6b0a\u9650\u3002\u6240\u8b02\u7684\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab (Cross-Origin Resource Sharing, CORS) \u662f\u4e00\u7a2e\u4f7f\u7528\u984d\u5916 HTTP \u6a19\u982d\u4f86\u8b93\u76ee\u524d\u700f\u89bd\u7db2\u7ad9\u7684\u4f7f\u7528\u8005\u80fd\u8a2a\u554f\u4e0d\u540c\u4f86\u6e90\u7db2\u57df\u7684\u4f3a\u670d\u5668\u3002\u7576\u4f7f\u7528\u8005\u8acb\u6c42\u4e00\u500b\u4f86\u81ea\u65bc\u4e0d\u540c\u7db2\u57df\u3001\u901a\u8a0a\u5354\u5b9a\u6216\u901a\u8a0a\u57e0\u7684\u8cc7\u6e90\u6642\uff0c\u6703\u5efa\u7acb\u4e00\u500b\u8de8\u4f86\u6e90 HTTP \u8acb\u6c42\u3002\u6240\u4ee5\u5728\u64b0\u5beb\u7a0b\u5f0f\u7684\u6642\u5019\u5fc5\u9808\u900f\u904e flask_cors \u88e1\u9762\u6240\u63d0\u4f9b\u7684 CORS \u6dfb\u52a0\u8de8\u4f86\u6e90\u8cc7\u6e90\u5171\u4eab\u3002\u9019\u6a23\u524d\u7aef\u4f7f\u7528\u8005\u5728\u4e0d\u540c\u7db2\u57df\u5229\u7528 ajax \u6216 fetch \u5b58\u53d6 API \u6642\u5c31\u6703\u6709\u8b80\u53d6\u6b0a\u9650\u3002 # -*- coding: UTF-8 -*- import numpy as np import model from flask import Flask , request , jsonify from flask_cors import CORS app = Flask ( __name__ ) CORS ( app ) \u63a5\u8457\u6211\u5011\u5148\u793a\u7bc4\u5efa\u7acb\u4e00\u500b GET \u7684\u8def\u7531 @app.route('/') \uff0c\u55ae\u5f15\u865f\u5167\u7684\u5167\u5bb9\u5373\u4ee3\u8868\u4f7f\u7528\u8005\u5728\u547c\u53eb API \u7684\u8def\u5f91\u4f4d\u7f6e / \u4ee3\u8868\u662f root \u7684\u610f\u601d\u3002\u5728\u9019\u4e00\u500b\u6e2c\u8a66\u7684\u8def\u7531\u4e2d\u6211\u5011\u76f4\u63a5\u56de\u50b3\u4e00\u500b hello!! \u7684\u5b57\u4e32\u3002\u7a0d\u5f8c\u5c07\u6703\u6559\u5404\u4f4d\u5982\u4f55\u900f\u904e Postman \u9019\u500b\u8edf\u9ad4\u4f86\u6e2c\u8a66 API\u3002\u53e6\u5916\u7b2c\u4e8c\u500b\u8def\u7531\u662f\u8ca0\u8cac\u63a5\u6536\u82b1\u6735\u56db\u500b\u6578\u503c\uff0c\u4e26\u5c07\u9019\u56db\u500b\u6578\u503c\u653e\u5230 Numpy \u9663\u5217\u4e2d\u9001\u5230\u7a0d\u65e9\u4ee5\u5c01\u88dd\u597d\u7684 mpdel.py \u4e2d\u7684 predict() \u65b9\u6cd5\u3002\u4e26\u5c07\u9810\u6e2c\u7684\u7d50\u679c\u900f\u904e JSON \u683c\u5f0f\u56de\u61c9\u7d66\u524d\u7aef\u4f7f\u7528\u8005\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\u9019\u4e00\u500b\u8def\u7531\u6211\u5011\u662f\u8a2d\u5b9a\u4ed6\u7684\u8def\u5f91\u70ba /predict \u4ee5\u53ca HTTP Requesst \u7684\u65b9\u6cd5\u6307\u5b9a methods=['POST'] \u3002 @app . route ( '/' ) def index (): return 'hello!!' @app . route ( '/predict' , methods = [ 'POST' ]) def postInput (): # \u53d6\u5f97\u524d\u7aef\u50b3\u904e\u4f86\u7684\u6578\u503c insertValues = request . get_json () x1 = insertValues [ 'sepalLengthCm' ] x2 = insertValues [ 'sepalWidthCm' ] x3 = insertValues [ 'petalLengthCm' ] x4 = insertValues [ 'petalWidthCm' ] input = np . array ([[ x1 , x2 , x3 , x4 ]]) result = model . predict ( input ) return jsonify ({ 'return' : str ( result )}) \u6700\u5f8c\u6211\u5011\u900f\u904e app.run() \u5c07\u6b64 API \u90e8\u7f72\u5728\u4f3a\u670d\u5668\u7684 3000 PORT \u7576\u4e2d\u3002 host='0.0.0.0' \u8868\u793a\u9810\u8a2d\u8def\u7531\u5c07\u6703\u81ea\u52d5\u5e6b\u4f60\u53d6\u5f97\u76ee\u524d\u4f3a\u670d\u5668\u7684\u56fa\u5b9a IP \u4f4d\u7f6e\u3002\u7531\u65bc\u6211\u5011\u76ee\u524d\u5728\u672c\u6a5f\u958b\u767c\u7b49\u7b49\u6e2c\u8a66\u6642\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 http://localhost:3000 \u9032\u884c\u6e2c\u8a66\u3002\u53e6\u5916\u53c3\u6578 debug \u8a2d\u5b9a\u70ba True \u5373\u8868\u793a API \u88ab\u555f\u52d5\u6642\u6703\u81ea\u52d5\u76e3\u807d\u7a0b\u5f0f\u662f\u5426\u6709\u8b8a\u52d5\uff0c\u5982\u679c\u6709\u66f4\u65b0\u5167\u5bb9\u5373\u6703\u7acb\u523b\u91cd\u65b0\u555f\u52d5 API\u3002\u6b64\u8a2d\u5b9a\u9069\u5408\u5728\u958b\u767c\u6642\u5019\u4f7f\u7528\uff0c\u800c\u771f\u6b63\u4e0a\u7dda\u6642\u518d\u8abf\u6210 False\u3002 if __name__ == '__main__' : app . run ( host = '0.0.0.0' , port = 3000 , debug = True )","title":"\u5efa\u7acb Flask API (run.py)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#requirementstxt","text":"requirements.txt \u9019\u4e00\u652f\u6a94\u6848\u662f\u8ca0\u8a18\u9304\u4e86\u7576\u524d\u5c08\u6848\u8cc7\u6599\u593e\u4e0b\u7a0b\u5f0f\u6240\u6709\u4f9d\u8cf4\u7684\u5957\u4ef6\u53ca\u76f8\u5c0d\u61c9\u7684\u7248\u672c\u3002\u4e0b\u5217\u4e94\u500b\u662f\u5728\u672c\u5be6\u4f5c\u4e2d\u5c07\u6703\u4f7f\u7528\u5230\u7684\u5957\u4ef6\uff0c\u82e5\u5957\u4ef6\u5f8c\u9762\u6c92\u6709\u7279\u5225\u6307\u5b9a\u7248\u672c\u865f\uff0c\u5b89\u88dd\u6642\u5c07\u6703\u81ea\u52d5\u5b89\u88dd\u6700\u65b0\u7684\u7248\u672c\u3002 Flask Flask-Cors numpy scikit-learn xgboost \u5047\u8a2d\u7a0b\u5f0f\u5728\u53e6\u4e00\u53f0\u96fb\u8166\u4e0a\u57f7\u884c\u6642\uff0c\u8981\u4e00\u500b\u4e00\u500b\u5b89\u88dd\u5957\u4ef6\u5f88\u9ebb\u7169\u3002\u56e0\u6b64\u53ef\u4ee5\u76f4\u63a5\u900f\u904e requirements.txt \u7d00\u9304\u5c08\u6848\u4e2d\u4f9d\u8cf4\u7684\u5957\u4ef6\u3002\u4e26\u4e14\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\u4e00\u6b21\u5b89\u88dd\u6240\u6709\u6307\u5b9a\u7684\u5957\u4ef6\u3002 pip install -r requirements.txt","title":"\u7ba1\u7406\u5957\u4ef6\u7248\u672c (requirements.txt)"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api_2","text":"\u5728\u672c\u6a5f\u6216\u958b\u767c\u74b0\u5883\u4e2d\u6e2c\u8a66\u57f7\u884c API \u7684\u65b9\u5f0f\u5f88\u7c21\u55ae\u3002\u53ea\u8981\u958b\u555f\u7d42\u7aef\u6a5f\u4e26\u8f38\u5165\u4ee5\u4e0b\u6307\u4ee4\u5373\u53ef\uff1a python run.py \u7a0b\u5f0f\u771f\u6b63\u4e0a\u7dda\u6642\u5efa\u8b70\u4f7f\u7528 gunicorn \u6216 Waitress \u4f86\u7522\u751f WSGI \u670d\u52d9\uff0c\u4e26\u65bc\u80cc\u666f\u904b\u884c \u9084\u8a18\u5f97\u6211\u5011\u6709\u5beb\u4e00\u500b GET \u65b9\u6cd5\u7684\u6e2c\u8a66\u8def\u7531\u55ce\uff1f\u9019\u6642\u5019\u5927\u5bb6\u53ef\u4ee5\u958b\u555f\u96fb\u8166\u4e2d\u7684\u700f\u89bd\u5668\u4e26\u5728\u7db2\u5740\u5217\u8f38\u5165 http://localhost:3000 \u5373\u53ef\u7acb\u5373\u770b\u5230 API \u5728\u6307\u5b9a\u7684\u8def\u5f91\u4e0b\u6240\u56de\u61c9\u7684\u5167\u5bb9\u3002\u5982\u679c\u51fa\u73fe\u4ee5\u4e0b\u756b\u9762\u5373\u4ee3\u8868 API \u5df2\u7d93\u6b63\u5e38\u7684\u88ab\u904b\u884c\u56c9\u3002 \u90a3\u4f60\u53ef\u80fd\u6703\u554f\u6211\u8a72\u600e\u9ebc\u6e2c\u8a66\u53e6\u4e00\u500b POST \u65b9\u6cd5\u5462\uff1f\u7531\u65bc GET \u65b9\u6cd5\u6bd4\u8f03\u597d\u8655\u7406\uff0c\u6211\u5011\u76f4\u63a5\u5728\u700f\u89bd\u5668\u8f38\u5165\u8def\u5f91\u5c31\u80fd\u7acb\u5373\u89c0\u770b\u7d50\u679c\u3002\u90a3\u7576\u6211\u5011\u8981\u6e2c\u8a66 POST\u3001PUT\u3001DELETE \u7b49\u65b9\u6cd5\u6642\u5c31\u5fc5\u9808\u4f9d\u9760\u7b2c\u4e09\u65b9\u8edf\u9ad4 Postman \u4f86\u5354\u52a9\u6a21\u64ec HTTP Request \u5b8c\u6210 API \u6e2c\u8a66\u3002","title":"\u57f7\u884c API"},{"location":"29.\u4f7f\u7528Python-Flask\u67b6\u8a2dAPI\u5427/#api-postman","text":"\u7576\u4f60\u5beb\u597d\u4e00\u652f API \u6642\u8981\u99ac\u4e0a\u6e2c\u8a66\u770b\u770b\u4f60\u5beb\u7684\u7a0b\u5f0f\u908f\u8f2f\u662f\u5426\u6b63\u78ba\uff0c\u5c31\u53ef\u4ee5\u4f7f\u7528 Postman \u9019\u500b\u8edf\u9ad4\u4f86\u505a API \u6e2c\u8a66\u3002Postman \u4ed6\u662f\u4e00\u500b\u80fd\u5920\u6a21\u64ec HTTP Request \u7684\u5de5\u5177\u80fd\u5920\u8b93\u4f60\u7c21\u55ae\u5feb\u901f\u7684\u6e2c\u8a66\u4f60\u7684 API\uff0c\u4e26\u4e14\u5167\u5efa\u5305\u542b\u8a31\u591a HTTP \u7684\u8acb\u6c42\u65b9\u5f0f\uff0c\u4f8b\u5982\u5e38\u898b\u7684 GET(\u53d6\u5f97)\u3001POST(\u65b0\u589e)\u3001PUT(\u4fee\u6539)\u3001DELETE(\u522a\u9664)\u3002\u9996\u5148\u5927\u5bb6\u53ef\u4ee5\u5230 \u5b98\u7db2 \u4e0b\u8f09\u8207\u5b89\u88dd\u3002 \u5b89\u88dd\u597d\u4e4b\u5f8c\u53ef\u4ee5\u6253\u958b\u7a0b\u5f0f\u4e26\u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740\u3002\u7531\u65bc\u6211\u5011\u73fe\u5728\u8981\u6e2c\u8a66\u53e6\u4e00\u500b\u9810\u6e2c\u7684\u8def\u5f91 predict \uff0c\u56e0\u6b64\u5728\u7db2\u5740\u5217\u8cbc\u4e0a http://localhost:3000/predict \u3002\u9ede\u9078 Body-> raw -> JSON \u4e26\u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u9032\u884c\u63cf\u8ff0\u3002 { \"sepalLengthCm\" : 5.9 , \"sepalWidthCm\" : 3 , \"petalLengthCm\" : 5.1 , \"petalWidthCm\" : 1.8 } \u9ede\u9078 send \u5f8c\u5373\u53ef\u5c07\u6a21\u64ec\u7684\u56db\u500b\u6578\u503c\u900f\u904e JSON \u683c\u5f0f\u4f7f\u7528 POST \u65b9\u6cd5\u50b3\u9001\u5230\u5f8c\u7aef API \u4e2d\u7684 predict \u8def\u5f91\u3002\u8a72 API \u900f\u904e POST \u63a5\u6536\u5230\u524d\u7aef\u4f7f\u7528\u8005\u6240\u767c\u9001\u7684\u8a0a\u606f\u5f8c\uff0c\u89e3\u6790\u9019\u56db\u500b\u6578\u503c\u4e26\u4f9d\u5e8f\u653e\u5728\u9663\u5217\u4e2d\u4e26\u9032\u884c\u6a21\u578b\u9810\u6e2c\u3002\u6700\u7d42\u9810\u6e2c\u7d50\u679c\u6703\u5c07\u82b1\u7684\u7a2e\u985e\u4ee5 JSON \u683c\u5f0f\u56de\u50b3\u5230\u524d\u7aef\u4f7f\u7528\u8005\u3002\u6b64\u6642\u524d\u7aef\u7684\u7db2\u9801\u8a2d\u8a08\u5e2b\u5c31\u53ef\u4ee5\u5c07\u62ff\u5230\u7684\u9810\u6e2c\u7d50\u679c\u9032\u884c\u524d\u7aef\u7684\u756b\u9762\u6e32\u67d3\u8207\u66f4\u65b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6e2c\u8a66 API \u7684\u597d\u5de5\u5177 Postman"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/","text":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427! \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u804a\u804a\u4f55\u8b02 EDA\uff0c\u70ba\u4f55\u8981\u505a\u6578\u64da\u5206\u6790? \u64b0\u5beb\u7b2c\u4e00\u652f EDA \u7a0b\u5f0f \u900f\u904e\u9cf6\u5c3e\u82b1 (iris) \u8cc7\u6599\u96c6\uff0c\u4f86\u67e5\u770b\u8cc7\u6599\u7684\u5206\u4f48\u72c0\u614b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u63a2\u7d22\u5f0f\u8cc7\u6599\u5206\u6790 (Exploratory Data Analysis, EDA)\uff0c\u4e3b\u8981\u6982\u5ff5\u662f\u5229\u7528\u6578\u64da\u7d71\u8a08\u7684\u65b9\u5f0f\u8996\u89ba\u5316\u8cc7\u6599\u3002\u900f\u904e\u8cc7\u6599\u7684\u63a2\u7d22\u5f0f\u5206\u6790\u53ef\u4ee5\u67e5\u770b\u8cc7\u6599\u96c6\u7576\u4e2d\u6bcf\u500b\u7279\u5fb5\u5f7c\u6b64\u7684\u91cd\u8981\u7a0b\u5ea6\u4ee5\u53ca\u5176\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1\uff0c\u6709\u826f\u597d\u7684\u6578\u64da\u5206\u6790\u7fd2\u6163\u80fd\u5920\u5e6b\u52a9\u4f60\u66f4\u4e86\u89e3\u8cc7\u6599\u96c6\u7684\u7279\u6027\u3002\u53e6\u5916\u505a EDA \u7684\u597d\u8655\u662f\u53ef\u4ee5\u5f9e\u5404\u7a2e\u9762\u5411\u5148\u4e86\u89e3\u8cc7\u6599\u7684\u72c0\u6cc1\uff0c\u4ee5\u5229\u5f8c\u7e8c\u7684\u6a21\u578b\u5206\u6790\u3002 EDA \u5fc5\u8981\u7684\u5957\u4ef6 \u8cc7\u6599\u8655\u7406 \u2013 Pandas, Numpy Pandas \uff1aPython \u8868\u683c\u8cc7\u6599\u8655\u7406\u7684\u91cd\u8981\u5de5\u5177 Numpy \uff1a\u91dd\u5c0d\u591a\u7dad\u9663\u5217\u7684\u5e73\u884c\u904b\u7b97\u9032\u884c\u512a\u5316\u7684\u5f37\u5927\u51fd\u5f0f\u5eab \u7e6a\u5716\u76f8\u95dc \u2013 Matplotlib, Seaborn Matplotlib \uff1aPython \u6700\u5e38\u88ab\u4f7f\u7528\u5230\u7684\u7e6a\u5716\u5957\u4ef6 Seaborn \uff1a\u4ee5 matplotlib \u70ba\u5e95\u5c64\u7684\u9ad8\u968e\u7e6a\u5716\u5957\u4ef6 \u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4e00\u89bd \u6b64\u8cc7\u6599\u96c6\u7e3d\u5171\u67094\u500b\u8f38\u5165\u7279\u5fb5\u3002\u5206\u5225\u70ba\u82b1\u843c\u9577\u5ea6\u3001\u82b1\u843c\u5bec\u5ea6\u3001\u82b1\u74e3\u9577\u5ea6\u8207\u82b1\u74e3\u5bec\u5ea6\u3002\u8f38\u51fa\u7279\u5fb5\u70ba\u82b1\u6735\u7684\u54c1\u7a2e\uff0c\u5171\u6709\u4e09\u7a2e\u985e\u5225\u5206\u5225\u70ba 0: iris setosa\u3001 1: iris versicolor\u3001 2: iris virginica\u3002 \u8f09\u5165\u5fc5\u8981\u5957\u4ef6 \u9996\u5148\u6211\u5011\u8f09\u5165\u8cc7\u6599\u63a2\u7d22\u5f0f\u5206\u6790\u6240\u9700\u7684\u5957\u4ef6\u3002\u5206\u5225\u6709\u9032\u884c\u6578\u64da\u8655\u7406\u7684\u51fd\u5f0f\u5eab\u7684 pandas \u3001\u9ad8\u968e\u5927\u91cf\u7684\u7dad\u5ea6\u9663\u5217\u8207\u77e9\u9663\u904b\u7b97\u7684 numpy \u3001\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7e6a\u5716\u5eab matplotlib \u8207 seaborn \u3002\u6700\u5f8c\u4e00\u500b\u662f\u8cc7\u6599\u96c6\u4f86\u6e90\uff0c\u6b64\u7cfb\u5217\u7bc4\u4f8b\u6211\u5011\u63a1\u7528 Sklearn \u6240\u63d0\u4f9b\u7684\u9cf6\u5c3e\u82b1\u5206\u985e\u7684\u8cc7\u6599\u96c6\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris Sklearn Toy datasets Sklearn \u5957\u4ef6\u4e2d\u63d0\u4f9b\u4e86\u4e03\u500b\u5feb\u901f\u5165\u9580\u7684 Toy datasets \u5f88\u63a8\u85a6\u521d\u5b78\u8005\u53ef\u4ee5\u8f09\u5165\u4f86\u73a9\u73a9\u770b\uff0c\u4e26\u4e14\u7df4\u7fd2\u505a\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u6a21\u3002\u6bcf\u4e00\u500b\u8cc7\u6599\u96c6\u547c\u53eb\u7684\u65b9\u6cd5\u975e\u5e38\u7c21\u55ae\u3002\u4ee5\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u70ba\u4f8b\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e API \u53d6\u5f97\u8f38\u5165\u8207\u8f38\u51fa\u3002 from sklearn.datasets import load_iris iris = load_iris () # \u8f38\u5165\u7279\u5fb5 X = iris . data # \u8f38\u51fa\u7279\u5fb5 y = iris . target Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a API \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff1a data: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5 target: \u53d6\u5f97\u8f38\u51fa\u7279\u5fb5 feature_names: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5\u7684\u540d\u7a31 target_names: \u53d6\u5f97\u8f38\u51fa\u7684\u985e\u5225\u6a19\u7c64(\u5206\u985e\u8cc7\u6599\u96c6) DESCR: \u8cc7\u6599\u96c6\u8a73\u7d30\u63cf\u8ff0 \u5982\u679c\u60f3\u8a66\u8a66\u5176\u4ed6\u7684\u8cc7\u6599\u96c6\u53ef\u4ee5\u53c3\u8003\uff1a \u8ff4\u6b78\u554f\u984c load_boston \u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c load_diabetes \u7cd6\u5c3f\u75c5\u9810\u6e2c load_linnerud \u9ad4\u80fd\u8a55\u4f30\u9810\u6e2c \u5206\u985e\u554f\u984c load_iris \u9cf6\u5c3e\u82b1\u7a2e\u985e\u9810\u6e2c load_digits \u624b\u5beb\u6578\u5b57\u8fa8\u8b58 load_wine \u8461\u8404\u9152\u7a2e\u985e\u9810\u6e2c load_breast_cancer \u4e73\u764c\u9810\u6e2c \u53c3\u8003 \u8f09\u5165\u8cc7\u6599\u96c6 \u9996\u5148\u6211\u5011\u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u3002\u70ba\u4e86\u65b9\u4fbf\u5206\u6790\u6211\u5011\u5c07 numpy \u683c\u5f0f\u7684\u8cc7\u6599\u8f49\u63db\u6210 DataFrame \u7684\u683c\u5f0f\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u3002\u56e0\u70ba\u900f\u904e Pandas \u7684 DataFrame \u683c\u5f0f\u6211\u5011\u66f4\u80fd\u7528\u8868\u683c\u7684\u5f62\u5f0f\u89c0\u5bdf\u8cc7\u6599\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data \u76f4\u65b9\u5716 \u76f4\u65b9\u5716\u662f\u4e00\u7a2e\u5c0d\u6578\u64da\u5206\u5e03\u60c5\u6cc1\u7684\u5716\u5f62\u8868\u793a\uff0c\u662f\u4e00\u7a2e\u4e8c\u7dad\u7d71\u8a08\u5716\u8868\u3002\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb Pandas \u5167\u5efa\u51fd\u5f0f hist() \u9032\u884c\u76f4\u65b9\u5716\u5206\u6790\u3002\u5176\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a bins(\u7bb1\u6578)\uff0c\u9810\u8a2d\u503c\u70ba 10\u3002\u5982\u679c\u8a2d\u5b9a\u7684\u8f38\u91cf\u8d8a\u5927\uff0c\u5176\u4ee3\u8868\u9700\u8981\u5206\u5272\u7684\u7cbe\u5ea6\u8d8a\u7d30\u3002\u901a\u5e38\u53d6\u4e00\u500b\u9069\u7576\u7684\u7bb1\u6578\u5373\u53ef\u89c0\u5bdf\u8a72\u7279\u5fb5\u5728\u8cc7\u6599\u96c6\u4e2d\u7684\u5206\u4f48\u60c5\u6cc1\u3002\u85c9\u7531\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u6bcf\u500b\u503c\u57df\u7684\u5206\u4f48\u5927\u5c0f\u8207\u6578\u91cf\u3002\u6211\u5011\u4e5f\u80fd\u767c\u73fe\u8f38\u51fa\u9805\u7684\u985e\u5225\u5171\u6709\u4e09\u500b\uff0c\u4e26\u4e14\u9019\u4e09\u500b\u985e\u5225\u7684\u6578\u91cf\u90fd\u525b\u597d\u5404\u6709 50 \u7b46\u8cc7\u6599\u3002\u6211\u5011\u4e5f\u80fd\u5f97\u77e5\u9019\u4e00\u4efd\u8cc7\u6599\u96c6\u7684\u8f38\u51fa\u985e\u5225\u662f\u4e00\u500b\u975e\u5e38\u5747\u52fb\u7684\u8cc7\u6599\u3002 #\u76f4\u65b9\u5716 histograms df_data . hist ( alpha = 0.6 , layout = ( 3 , 3 ), figsize = ( 12 , 8 ), bins = 10 ) plt . tight_layout () plt . show () \u6211\u5011\u4e5f\u53ef\u4ee5\u900f\u904e Seaborn \u7684 histplot \u505a\u51fa\u66f4\u8a73\u7d30\u7684\u76f4\u65b9\u5716\u5206\u6790\u3002\u4e26\u5229\u7528\u548c\u5bc6\u5ea6\u4f30\u8a08 kde=True \u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u4f48\u72c0\u6cc1\u3002 fig , axes = plt . subplots ( nrows = 1 , ncols = 4 ) fig . set_size_inches ( 15 , 4 ) sns . histplot ( df_data [ \"SepalLengthCm\" ][:], ax = axes [ 0 ], kde = True ) sns . histplot ( df_data [ \"SepalWidthCm\" ][:], ax = axes [ 1 ], kde = True ) sns . histplot ( df_data [ \"PetalLengthCm\" ][:], ax = axes [ 2 ], kde = True ) sns . histplot ( df_data [ \"PetalWidthCm\" ][:], ax = axes [ 3 ], kde = True ) \u6838\u5bc6\u5ea6\u4f30\u8a08 \u6838\u5bc6\u5ea6\u4f30\u8a08\u5206\u7232\u5169\u90e8\u5206\uff0c\u5206\u5225\u6709\u5c0d\u89d2\u7dda\u90e8\u5206\u548c\u975e\u5c0d\u89d2\u7dda\u90e8\u5206\u3002\u5728\u5c0d\u89d2\u7dda\u90e8\u5206\u662f\u4ee5\u6838\u5bc6\u5ea6\u4f30\u8a08\u5716\uff08Kernel Density Estimation\uff09\u7684\u65b9\u5f0f\u5448\u73fe\uff0c\u4e5f\u5c31\u662f\u7528\u4f86\u770b\u67d0\u4e00\u500b\u7279\u5fb5\u7684\u5206\u4f48\u60c5\u6cc1\uff0cx\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u6578\u503c\uff0cy\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u5bc6\u5ea6\u4e5f\u5c31\u662f\u7279\u5fb5\u51fa\u73fe\u7684\u983b\u7387\u3002\u5728\u975e\u5c0d\u89d2\u7dda\u7684\u90e8\u5206\u70ba\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u5206\u4f48\u7684\u95dc\u806f\u6563\u9ede\u5716\u3002\u5c07\u4efb\u610f\u5169\u500b\u7279\u5fb5\u9032\u884c\u914d\u5c0d\uff0c\u4ee5\u5176\u4e2d\u4e00\u500b\u7232\u6a6b\u5ea7\u6a19\uff0c\u53e6\u4e00\u500b\u7232\u7e31\u5ea7\u6a19\uff0c\u5c07\u6240\u6709\u7684\u6578\u64da\u9ede\u7e6a\u88fd\u5728\u5716\u4e0a\uff0c\u7528\u4f86\u8861\u91cf\u5169\u500b\u8b8a\u91cf\u7684\u95dc\u806f\u7a0b\u5ea6\u3002 \u4f7f\u7528 Pandas \u7e6a\u88fd\uff1a from pandas.plotting import scatter_matrix scatter_matrix ( df_data , figsize = ( 10 , 10 ), color = 'b' , diagonal = 'kde' ) \u4f7f\u7528 Seaborn \u7e6a\u88fd\uff1a sns . pairplot ( df_data , hue = \"Species\" , height = 2 , diag_kind = \"kde\" ) \u95dc\u806f\u5206\u6790 \u900f\u904e pandas \u7684 corr() \u51fd\u5f0f\u53ef\u4ee5\u5feb\u901f\u7684\u8a08\u7b97\u6bcf\u500b\u7279\u5fb5\u9593\u7684\u5f7c\u6b64\u95dc\u806f\u7a0b\u5ea6\u3002\u5176\u5340\u9593\u503c\u70ba-1~1\u4e4b\u9593\uff0c\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u95dc\u806f\u7a0b\u5ea6\u6b63\u76f8\u95dc\u8d8a\u9ad8\u3002\u76f8\u53cd\u7684\u7576\u8ca0\u7684\u7a0b\u5ea6\u5f88\u9ad8\u6211\u5011\u53ef\u4ee5\u89e3\u91cb\u9019\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u662f\u6709\u5f88\u9ad8\u7684\u8ca0 \u95dc\u806f\u6027\u3002 # correlation \u8a08\u7b97 corr = df_data [[ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]] . corr () plt . figure ( figsize = ( 8 , 8 )) sns . heatmap ( corr , square = True , annot = True , cmap = \"RdBu_r\" ) \u6563\u4f48\u5716 \u900f\u904e\u6563\u4f48\u5716\u6211\u5011\u53ef\u4ee5\u5f9e\u4e8c\u7dad\u7684\u5e73\u9762\u4e0a\u89c0\u5bdf\u5169\u5169\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5206\u4f48\u72c0\u6cc1\u3002\u5982\u679c\u8a72\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6\u8d8a\u9ad8\uff0c\u7fa4\u805a\u7684\u6548\u679c\u6703\u66f4\u52a0\u986f\u8457\u3002 sns . lmplot ( \"SepalLengthCm\" , \"SepalWidthCm\" , hue = 'Species' , data = df_data , fit_reg = False , legend = False ) plt . legend ( title = 'Species' , loc = 'upper right' , labels = [ 'Iris-Setosa' , 'Iris-Versicolour' , 'Iris-Virginica' ]) \u7bb1\u5f62\u5716 \u900f\u904e\u7bb1\u5f62\u5716\u53ef\u4ee5\u5206\u6790\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u4ee5\u53ca\u662f\u5426\u6709\u96e2\u7fa4\u503c\u3002\u6211\u5011\u5229\u7528\u7bb1\u5f62\u5716\u4f86\u8868\u793a\u56db\u5206\u4f4d\u6578\u4f86\u89c0\u5bdf\u6578\u64da\u5206\u6563\u60c5\u6cc1\u3002\u7bb1\u5f62\u7684\u5169\u7aef\u70ba\u7b2c\u4e00\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb25%\u4e4b\u8cc7\u6599(Q1)\u8207\u7b2c\u4e09\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb75%\u4e4b\u8cc7\u6599(Q3)\uff0c\u800c\u7bb1\u5f62\u5716\u7684\u4e2d\u9593\u7dda\u70ba\u4e2d\u4f4d\u6578\u986f\u793a\u6db5\u84cb\u524d50%\u8cc7\u6599\u4e4b\u4f4d\u7f6e\u3002\u7bb1\u5f62\u4e0a\u865b\u7dda\u7684\u7aef\u9ede\u70ba\u6975\u5927\u503c\uff0c\u7bb1\u578b\u4e0b\u865b\u7dda\u7684\u9ede\u70ba\u6975\u5c0f\u503c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427!"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#day-3","text":"","title":"[Day 3] \u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce?\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427!"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_1","text":"\u63a2\u7d22\u5f0f\u5206\u6790 (EDA) \u804a\u804a\u4f55\u8b02 EDA\uff0c\u70ba\u4f55\u8981\u505a\u6578\u64da\u5206\u6790? \u64b0\u5beb\u7b2c\u4e00\u652f EDA \u7a0b\u5f0f \u900f\u904e\u9cf6\u5c3e\u82b1 (iris) \u8cc7\u6599\u96c6\uff0c\u4f86\u67e5\u770b\u8cc7\u6599\u7684\u5206\u4f48\u72c0\u614b \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#eda","text":"\u63a2\u7d22\u5f0f\u8cc7\u6599\u5206\u6790 (Exploratory Data Analysis, EDA)\uff0c\u4e3b\u8981\u6982\u5ff5\u662f\u5229\u7528\u6578\u64da\u7d71\u8a08\u7684\u65b9\u5f0f\u8996\u89ba\u5316\u8cc7\u6599\u3002\u900f\u904e\u8cc7\u6599\u7684\u63a2\u7d22\u5f0f\u5206\u6790\u53ef\u4ee5\u67e5\u770b\u8cc7\u6599\u96c6\u7576\u4e2d\u6bcf\u500b\u7279\u5fb5\u5f7c\u6b64\u7684\u91cd\u8981\u7a0b\u5ea6\u4ee5\u53ca\u5176\u8cc7\u6599\u5206\u5e03\u72c0\u6cc1\uff0c\u6709\u826f\u597d\u7684\u6578\u64da\u5206\u6790\u7fd2\u6163\u80fd\u5920\u5e6b\u52a9\u4f60\u66f4\u4e86\u89e3\u8cc7\u6599\u96c6\u7684\u7279\u6027\u3002\u53e6\u5916\u505a EDA \u7684\u597d\u8655\u662f\u53ef\u4ee5\u5f9e\u5404\u7a2e\u9762\u5411\u5148\u4e86\u89e3\u8cc7\u6599\u7684\u72c0\u6cc1\uff0c\u4ee5\u5229\u5f8c\u7e8c\u7684\u6a21\u578b\u5206\u6790\u3002","title":"\u63a2\u7d22\u5f0f\u5206\u6790 (EDA)"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#eda_1","text":"\u8cc7\u6599\u8655\u7406 \u2013 Pandas, Numpy Pandas \uff1aPython \u8868\u683c\u8cc7\u6599\u8655\u7406\u7684\u91cd\u8981\u5de5\u5177 Numpy \uff1a\u91dd\u5c0d\u591a\u7dad\u9663\u5217\u7684\u5e73\u884c\u904b\u7b97\u9032\u884c\u512a\u5316\u7684\u5f37\u5927\u51fd\u5f0f\u5eab \u7e6a\u5716\u76f8\u95dc \u2013 Matplotlib, Seaborn Matplotlib \uff1aPython \u6700\u5e38\u88ab\u4f7f\u7528\u5230\u7684\u7e6a\u5716\u5957\u4ef6 Seaborn \uff1a\u4ee5 matplotlib \u70ba\u5e95\u5c64\u7684\u9ad8\u968e\u7e6a\u5716\u5957\u4ef6","title":"EDA \u5fc5\u8981\u7684\u5957\u4ef6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_2","text":"\u6b64\u8cc7\u6599\u96c6\u7e3d\u5171\u67094\u500b\u8f38\u5165\u7279\u5fb5\u3002\u5206\u5225\u70ba\u82b1\u843c\u9577\u5ea6\u3001\u82b1\u843c\u5bec\u5ea6\u3001\u82b1\u74e3\u9577\u5ea6\u8207\u82b1\u74e3\u5bec\u5ea6\u3002\u8f38\u51fa\u7279\u5fb5\u70ba\u82b1\u6735\u7684\u54c1\u7a2e\uff0c\u5171\u6709\u4e09\u7a2e\u985e\u5225\u5206\u5225\u70ba 0: iris setosa\u3001 1: iris versicolor\u3001 2: iris virginica\u3002","title":"\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u4e00\u89bd"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_3","text":"\u9996\u5148\u6211\u5011\u8f09\u5165\u8cc7\u6599\u63a2\u7d22\u5f0f\u5206\u6790\u6240\u9700\u7684\u5957\u4ef6\u3002\u5206\u5225\u6709\u9032\u884c\u6578\u64da\u8655\u7406\u7684\u51fd\u5f0f\u5eab\u7684 pandas \u3001\u9ad8\u968e\u5927\u91cf\u7684\u7dad\u5ea6\u9663\u5217\u8207\u77e9\u9663\u904b\u7b97\u7684 numpy \u3001\u8655\u7406\u8cc7\u6599\u8996\u89ba\u5316\u7684\u7e6a\u5716\u5eab matplotlib \u8207 seaborn \u3002\u6700\u5f8c\u4e00\u500b\u662f\u8cc7\u6599\u96c6\u4f86\u6e90\uff0c\u6b64\u7cfb\u5217\u7bc4\u4f8b\u6211\u5011\u63a1\u7528 Sklearn \u6240\u63d0\u4f9b\u7684\u9cf6\u5c3e\u82b1\u5206\u985e\u7684\u8cc7\u6599\u96c6\u3002 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris","title":"\u8f09\u5165\u5fc5\u8981\u5957\u4ef6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#sklearn-toy-datasets","text":"Sklearn \u5957\u4ef6\u4e2d\u63d0\u4f9b\u4e86\u4e03\u500b\u5feb\u901f\u5165\u9580\u7684 Toy datasets \u5f88\u63a8\u85a6\u521d\u5b78\u8005\u53ef\u4ee5\u8f09\u5165\u4f86\u73a9\u73a9\u770b\uff0c\u4e26\u4e14\u7df4\u7fd2\u505a\u8cc7\u6599\u63a2\u7d22\u8207\u5efa\u6a21\u3002\u6bcf\u4e00\u500b\u8cc7\u6599\u96c6\u547c\u53eb\u7684\u65b9\u6cd5\u975e\u5e38\u7c21\u55ae\u3002\u4ee5\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u70ba\u4f8b\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e API \u53d6\u5f97\u8f38\u5165\u8207\u8f38\u51fa\u3002 from sklearn.datasets import load_iris iris = load_iris () # \u8f38\u5165\u7279\u5fb5 X = iris . data # \u8f38\u51fa\u7279\u5fb5 y = iris . target Sklearn \u63d0\u4f9b\u4e86\u8a31\u591a API \u65b9\u6cd5\u53ef\u4ee5\u547c\u53eb\uff1a data: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5 target: \u53d6\u5f97\u8f38\u51fa\u7279\u5fb5 feature_names: \u53d6\u5f97\u8f38\u5165\u7279\u5fb5\u7684\u540d\u7a31 target_names: \u53d6\u5f97\u8f38\u51fa\u7684\u985e\u5225\u6a19\u7c64(\u5206\u985e\u8cc7\u6599\u96c6) DESCR: \u8cc7\u6599\u96c6\u8a73\u7d30\u63cf\u8ff0 \u5982\u679c\u60f3\u8a66\u8a66\u5176\u4ed6\u7684\u8cc7\u6599\u96c6\u53ef\u4ee5\u53c3\u8003\uff1a \u8ff4\u6b78\u554f\u984c load_boston \u6ce2\u58eb\u9813\u623f\u50f9\u9810\u6e2c load_diabetes \u7cd6\u5c3f\u75c5\u9810\u6e2c load_linnerud \u9ad4\u80fd\u8a55\u4f30\u9810\u6e2c \u5206\u985e\u554f\u984c load_iris \u9cf6\u5c3e\u82b1\u7a2e\u985e\u9810\u6e2c load_digits \u624b\u5beb\u6578\u5b57\u8fa8\u8b58 load_wine \u8461\u8404\u9152\u7a2e\u985e\u9810\u6e2c load_breast_cancer \u4e73\u764c\u9810\u6e2c \u53c3\u8003","title":"Sklearn Toy datasets"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_4","text":"\u9996\u5148\u6211\u5011\u8f09\u5165\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u3002\u70ba\u4e86\u65b9\u4fbf\u5206\u6790\u6211\u5011\u5c07 numpy \u683c\u5f0f\u7684\u8cc7\u6599\u8f49\u63db\u6210 DataFrame \u7684\u683c\u5f0f\u9032\u884c\u8cc7\u6599\u63a2\u7d22\u3002\u56e0\u70ba\u900f\u904e Pandas \u7684 DataFrame \u683c\u5f0f\u6211\u5011\u66f4\u80fd\u7528\u8868\u683c\u7684\u5f62\u5f0f\u89c0\u5bdf\u8cc7\u6599\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"\u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_5","text":"\u76f4\u65b9\u5716\u662f\u4e00\u7a2e\u5c0d\u6578\u64da\u5206\u5e03\u60c5\u6cc1\u7684\u5716\u5f62\u8868\u793a\uff0c\u662f\u4e00\u7a2e\u4e8c\u7dad\u7d71\u8a08\u5716\u8868\u3002\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb Pandas \u5167\u5efa\u51fd\u5f0f hist() \u9032\u884c\u76f4\u65b9\u5716\u5206\u6790\u3002\u5176\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a bins(\u7bb1\u6578)\uff0c\u9810\u8a2d\u503c\u70ba 10\u3002\u5982\u679c\u8a2d\u5b9a\u7684\u8f38\u91cf\u8d8a\u5927\uff0c\u5176\u4ee3\u8868\u9700\u8981\u5206\u5272\u7684\u7cbe\u5ea6\u8d8a\u7d30\u3002\u901a\u5e38\u53d6\u4e00\u500b\u9069\u7576\u7684\u7bb1\u6578\u5373\u53ef\u89c0\u5bdf\u8a72\u7279\u5fb5\u5728\u8cc7\u6599\u96c6\u4e2d\u7684\u5206\u4f48\u60c5\u6cc1\u3002\u85c9\u7531\u76f4\u65b9\u5716\u6211\u5011\u53ef\u4ee5\u77e5\u9053\u6bcf\u500b\u503c\u57df\u7684\u5206\u4f48\u5927\u5c0f\u8207\u6578\u91cf\u3002\u6211\u5011\u4e5f\u80fd\u767c\u73fe\u8f38\u51fa\u9805\u7684\u985e\u5225\u5171\u6709\u4e09\u500b\uff0c\u4e26\u4e14\u9019\u4e09\u500b\u985e\u5225\u7684\u6578\u91cf\u90fd\u525b\u597d\u5404\u6709 50 \u7b46\u8cc7\u6599\u3002\u6211\u5011\u4e5f\u80fd\u5f97\u77e5\u9019\u4e00\u4efd\u8cc7\u6599\u96c6\u7684\u8f38\u51fa\u985e\u5225\u662f\u4e00\u500b\u975e\u5e38\u5747\u52fb\u7684\u8cc7\u6599\u3002 #\u76f4\u65b9\u5716 histograms df_data . hist ( alpha = 0.6 , layout = ( 3 , 3 ), figsize = ( 12 , 8 ), bins = 10 ) plt . tight_layout () plt . show () \u6211\u5011\u4e5f\u53ef\u4ee5\u900f\u904e Seaborn \u7684 histplot \u505a\u51fa\u66f4\u8a73\u7d30\u7684\u76f4\u65b9\u5716\u5206\u6790\u3002\u4e26\u5229\u7528\u548c\u5bc6\u5ea6\u4f30\u8a08 kde=True \u4f86\u67e5\u770b\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u4f48\u72c0\u6cc1\u3002 fig , axes = plt . subplots ( nrows = 1 , ncols = 4 ) fig . set_size_inches ( 15 , 4 ) sns . histplot ( df_data [ \"SepalLengthCm\" ][:], ax = axes [ 0 ], kde = True ) sns . histplot ( df_data [ \"SepalWidthCm\" ][:], ax = axes [ 1 ], kde = True ) sns . histplot ( df_data [ \"PetalLengthCm\" ][:], ax = axes [ 2 ], kde = True ) sns . histplot ( df_data [ \"PetalWidthCm\" ][:], ax = axes [ 3 ], kde = True )","title":"\u76f4\u65b9\u5716"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_6","text":"\u6838\u5bc6\u5ea6\u4f30\u8a08\u5206\u7232\u5169\u90e8\u5206\uff0c\u5206\u5225\u6709\u5c0d\u89d2\u7dda\u90e8\u5206\u548c\u975e\u5c0d\u89d2\u7dda\u90e8\u5206\u3002\u5728\u5c0d\u89d2\u7dda\u90e8\u5206\u662f\u4ee5\u6838\u5bc6\u5ea6\u4f30\u8a08\u5716\uff08Kernel Density Estimation\uff09\u7684\u65b9\u5f0f\u5448\u73fe\uff0c\u4e5f\u5c31\u662f\u7528\u4f86\u770b\u67d0\u4e00\u500b\u7279\u5fb5\u7684\u5206\u4f48\u60c5\u6cc1\uff0cx\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u6578\u503c\uff0cy\u8ef8\u5c0d\u61c9\u8457\u8a72\u7279\u5fb5\u7684\u5bc6\u5ea6\u4e5f\u5c31\u662f\u7279\u5fb5\u51fa\u73fe\u7684\u983b\u7387\u3002\u5728\u975e\u5c0d\u89d2\u7dda\u7684\u90e8\u5206\u70ba\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u5206\u4f48\u7684\u95dc\u806f\u6563\u9ede\u5716\u3002\u5c07\u4efb\u610f\u5169\u500b\u7279\u5fb5\u9032\u884c\u914d\u5c0d\uff0c\u4ee5\u5176\u4e2d\u4e00\u500b\u7232\u6a6b\u5ea7\u6a19\uff0c\u53e6\u4e00\u500b\u7232\u7e31\u5ea7\u6a19\uff0c\u5c07\u6240\u6709\u7684\u6578\u64da\u9ede\u7e6a\u88fd\u5728\u5716\u4e0a\uff0c\u7528\u4f86\u8861\u91cf\u5169\u500b\u8b8a\u91cf\u7684\u95dc\u806f\u7a0b\u5ea6\u3002 \u4f7f\u7528 Pandas \u7e6a\u88fd\uff1a from pandas.plotting import scatter_matrix scatter_matrix ( df_data , figsize = ( 10 , 10 ), color = 'b' , diagonal = 'kde' ) \u4f7f\u7528 Seaborn \u7e6a\u88fd\uff1a sns . pairplot ( df_data , hue = \"Species\" , height = 2 , diag_kind = \"kde\" )","title":"\u6838\u5bc6\u5ea6\u4f30\u8a08"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_7","text":"\u900f\u904e pandas \u7684 corr() \u51fd\u5f0f\u53ef\u4ee5\u5feb\u901f\u7684\u8a08\u7b97\u6bcf\u500b\u7279\u5fb5\u9593\u7684\u5f7c\u6b64\u95dc\u806f\u7a0b\u5ea6\u3002\u5176\u5340\u9593\u503c\u70ba-1~1\u4e4b\u9593\uff0c\u6578\u5b57\u8d8a\u5927\u4ee3\u8868\u95dc\u806f\u7a0b\u5ea6\u6b63\u76f8\u95dc\u8d8a\u9ad8\u3002\u76f8\u53cd\u7684\u7576\u8ca0\u7684\u7a0b\u5ea6\u5f88\u9ad8\u6211\u5011\u53ef\u4ee5\u89e3\u91cb\u9019\u5169\u500b\u7279\u5fb5\u4e4b\u9593\u662f\u6709\u5f88\u9ad8\u7684\u8ca0 \u95dc\u806f\u6027\u3002 # correlation \u8a08\u7b97 corr = df_data [[ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]] . corr () plt . figure ( figsize = ( 8 , 8 )) sns . heatmap ( corr , square = True , annot = True , cmap = \"RdBu_r\" )","title":"\u95dc\u806f\u5206\u6790"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_8","text":"\u900f\u904e\u6563\u4f48\u5716\u6211\u5011\u53ef\u4ee5\u5f9e\u4e8c\u7dad\u7684\u5e73\u9762\u4e0a\u89c0\u5bdf\u5169\u5169\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5206\u4f48\u72c0\u6cc1\u3002\u5982\u679c\u8a72\u7279\u5fb5\u91cd\u8981\u7a0b\u5ea6\u8d8a\u9ad8\uff0c\u7fa4\u805a\u7684\u6548\u679c\u6703\u66f4\u52a0\u986f\u8457\u3002 sns . lmplot ( \"SepalLengthCm\" , \"SepalWidthCm\" , hue = 'Species' , data = df_data , fit_reg = False , legend = False ) plt . legend ( title = 'Species' , loc = 'upper right' , labels = [ 'Iris-Setosa' , 'Iris-Versicolour' , 'Iris-Virginica' ])","title":"\u6563\u4f48\u5716"},{"location":"3.\u4f60\u771f\u4e86\u89e3\u8cc7\u6599\u55ce\u8a66\u8a66\u770b\u8996\u89ba\u5316\u5206\u6790\u5427/#_9","text":"\u900f\u904e\u7bb1\u5f62\u5716\u53ef\u4ee5\u5206\u6790\u6bcf\u500b\u7279\u5fb5\u7684\u5206\u5e03\u72c0\u6cc1\u4ee5\u53ca\u662f\u5426\u6709\u96e2\u7fa4\u503c\u3002\u6211\u5011\u5229\u7528\u7bb1\u5f62\u5716\u4f86\u8868\u793a\u56db\u5206\u4f4d\u6578\u4f86\u89c0\u5bdf\u6578\u64da\u5206\u6563\u60c5\u6cc1\u3002\u7bb1\u5f62\u7684\u5169\u7aef\u70ba\u7b2c\u4e00\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb25%\u4e4b\u8cc7\u6599(Q1)\u8207\u7b2c\u4e09\u500b\u56db\u5206\u4f4d\u6578\u6db5\u84cb75%\u4e4b\u8cc7\u6599(Q3)\uff0c\u800c\u7bb1\u5f62\u5716\u7684\u4e2d\u9593\u7dda\u70ba\u4e2d\u4f4d\u6578\u986f\u793a\u6db5\u84cb\u524d50%\u8cc7\u6599\u4e4b\u4f4d\u7f6e\u3002\u7bb1\u5f62\u4e0a\u865b\u7dda\u7684\u7aef\u9ede\u70ba\u6975\u5927\u503c\uff0c\u7bb1\u578b\u4e0b\u865b\u7dda\u7684\u9ede\u70ba\u6975\u5c0f\u503c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u7bb1\u5f62\u5716"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/","text":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u52d5\u624b\u90e8\u7f72\u81ea\u5df1\u7684\u6a5f\u5668\u5b78\u7fd2 API \u4f7f\u7528 Heroku \u96f2\u7aef\u5e73\u53f0\u90e8\u7f72\u61c9\u7528\u7a0b\u5f0f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code \u524d\u8a00 \u958b\u767c\u7684\u6700\u5f8c\u4e00\u54e9\u8def \u90e8\u7f72\u61c9\u7528 \u3002\u90e8\u7f72 API \u5fc5\u9808\u5728\u4e00\u500b\u7a69\u5b9a\u7684\u4f3a\u670d\u5668\u4e0a\u904b\u884c\uff0c\u5927\u591a\u6578\u4f01\u696d\u53ef\u80fd\u6703\u79df\u7528\u96f2\u7aef\u7684\u865b\u64ec\u4f3a\u670d\u5668\u3002\u5e38\u898b\u7684\u96f2\u7aef\u5e73\u53f0\u4e09\u5de8\u982d\u6709 Google Cloud Platform (GCP)\u3001Amazon Web Service (AWS) \u4ee5\u53ca Microsoft Azure\u3002\u4ee5\u4e0a\u4e09\u5bb6\u4f9b\u61c9\u5546\u90fd\u6709\u63d0\u4f9b\u514d\u8cbb\u7684\u8a66\u7528\u984d\u5ea6\u4ee5\u53ca\u90e8\u7f72\u7684\u6559\u5b78\uff0c\u53e6\u5916\u96f2\u7aef\u4f3a\u670d\u5668\u8a08\u8cbb\u7684\u65b9\u5f0f\u662f\u63a1\u7528\u591a\u5c11\u4ed8\u591a\u5c11\u7684\u6982\u5ff5\u6536\u8cbb\u3002\u82e5\u6709 GCP \u4f7f\u7528\u9700\u6c42\u53ef\u4ee5\u53c3\u8003\u6211\u904e\u53bb\u6240\u9304\u88fd\u7684\u7cfb\u5217\u6559\u5b78\u5f71\u7247 GCP\u6559\u5b78-Python \u3002 Heroku \u96f2\u7aef\u5e73\u53f0 Heroku \u662f\u4e00\u500b\u652f\u63f4\u591a\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\u7684\u96f2\u5e73\u53f0\u5373\u670d\u52d9\u3002\u4e26\u4e14\u63d0\u4f9b\u4e00\u500b~~\u514d\u8cbb~~(\u73fe\u5728\u8981\u4ed8\u8cbb\u4e86)\u7684\u96f2\u7aef\u670d\u52d9\uff0c\u9019\u500b\u96f2\u7aef\u5e73\u53f0~~\u4e00\u500b\u5e33\u865f\u53ef\u4ee5\u514d\u8cbb\u5efa\u7acb\u4e94\u500b\u5c08\u6848~~\uff0c\u96d6\u7136\u662f~~\u514d\u8cbb\u7576\u7136\u4e5f\u6709\u4f7f\u7528\u4e0a\u7684\u9650\u5236\u3002\u4f8b\u5982\uff1a(1) \u8d85\u904e 30\u5206\u9418 \u9592\u7f6e\u5c07\u6703\u9032\u5165\u7761\u7720\u72c0\u614b\uff0c\u4e4b\u5f8c\u91cd\u65b0\u555f\u52d5 API \u6642\u6703\u9700\u8981\u7b49\u5f85\u4e00\u4e9b\u6642\u9593\u624d\u6709\u56de\u61c9\u3002(2) 500MB \u7684\u5132\u5b58\u7a7a\u9593\u9650\u5236\u3002\u7576\u7136 Heroku \u4e5f\u63d0\u4f9b\u591a\u7a2e\u8a9e\u8a00\u7684\u90e8\u7f72\u74b0\u5883\u50cf\u662f Ruby\u3001Node.js\u3001PHP\u3001Go\u3001Python ...\u7b49\u3002~~ \u672c\u7bc7\u6587\u7ae0\u6703\u6559\u4f60\u5982\u4f55\u90e8\u7f72 Python \u7684 Flask API\u3002 1. \u524d\u7f6e\u4f5c\u696d 1.1) \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u9019\u4e00\u7bc7\u6587\u7ae0\u5c07\u4ee5\u82b1\u6735\u5206\u985e API \u70ba\u4f8b\uff0c\u62ff\u4e00\u500b\u5148\u5df2\u7d93\u8a13\u7df4\u597d\u7684\u6a21\u578b\u9032\u884c Python Flask API \u7684\u958b\u767c\u8207\u90e8\u7f72\u3002\u81f3\u65bc\u6a21\u578b\u7684\u8a13\u7df4\u548c Flask API \u7684\u8a73\u7d30\u5167\u5bb9\u9019\u908a\u5c31\u4e0d\u7d30\u63d0\uff0c\u82e5\u5404\u4f4d\u60f3\u4e86\u89e3\u7684\u53ef\u4ee5\u53c3\u8003\u6628\u5929\u7684\u5167\u5bb9 [Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u3002\u53e6\u5916\u5efa\u8b70\u5927\u5bb6\u53ef\u4ee5\u53c3\u8003\u4e0b\u9762\u9019\u4efd\u7a0b\u5f0f\u78bc\u9032\u884c\u4eca\u5929\u7684\u5167\u5bb9\u5be6\u4f5c\uff0c\u4f7f\u7528 GitHub \u4e26\u5c07\u7a0b\u5f0f fork \u5230\u81ea\u5df1\u7684\u5e33\u865f\u4e2d\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u4ee5\u4e0b\u7c21\u55ae\u8aaa\u660e\u5c08\u6848\u5167\u90e8\u7f72 Heroku \u7684\u91cd\u8981\u6a94\u6848\u3002 1.2 Procfile \u8a2d\u5b9a\u6a94 Procfile \u9019\u500b\u6a94\u6848\u662f\u8981\u544a\u8a34 Heroku \u8981\u5982\u4f55\u555f\u52d5\u9019\u500b web app\uff0c\u5728 Heroku \u88e1\uff0c\u57f7\u884c Python \u8981\u4f7f\u7528 Gunicorn \u4f86\u555f\u52d5 web server\u3002\u6240\u4ee5\u5728 requirements.txt \u88e1\uff0c\u8acb\u8a18\u5f97\u8981\u8f38\u5165 gunicorn\u3002Procfile \u6a94\u6848\uff0c\u7684\u5167\u5bb9\u5982\u4e0b\uff1a web gunicorn run:app 2. \u90e8\u7f72 Heroku \u5c08\u6848 2.1 \u5728 Heroku \u5efa\u7acb\u61c9\u7528\u7a0b\u5f0f \u5efa\u7acb\u5e33\u865f\u5f8c\u53f3\u4e0a\u89d2\u300c New \u300d\u4e2d\u7684\u300c Create new app \u300d\u5efa\u7acb\u7b2c\u4e00\u500b\u61c9\u7528\u7a0b\u5f0f\uff1a 2.2 \u5c08\u6848\u8207 GitHub \u9023\u52d5 \u9019\u4e00\u6b65\u9a5f\u662f\u5c07 GitHub \u4e0a\u7684\u5c08\u6848\u76f4\u63a5\u8207 Heroku \u505a\u9023\u52d5\uff0c\u4f60\u4e5f\u53ef\u4ee5\u76f4\u63a5 Fork \u9019\u500b\u5c08\u6848\u76f4\u63a5\u5be6\u4f5c\u3002\u6216\u662f\u4f60\u4e5f\u53ef\u4ee5\u900f\u904e Heroku CLI \u76f4\u63a5\u5c07\u672c\u6a5f\u7684\u7a0b\u5f0f\u78bc\u90e8\u7f72\u5230 Heroku \u4e3b\u6a5f\u4e2d\u3002\u90e8\u7f72\u968e\u6bb5\u883b\u5403\u5927\u5bb6 Git \u7248\u63a7\u7684\u80fd\u529b\uff0c\u57fa\u672c\u7684\u6559\u5b78\u9019\u88e1\u5c31\u4e0d\u8d05\u8ff0\uff0c\u60f3\u4e86\u89e3\u66f4\u591a Git \u6280\u5de7\u53ef\u4ee5 \u53c3\u8003 \u3002 \u9ede\u9078 Enable Automatic Deploys \u9023\u52d5\u5f8c\u53ef\u4ee5\u9078\u64c7\u81ea\u52d5\u90e8\u7f72\u3002\u7576\u4f60 GitHub \u5c08\u6848\u7684\u7a0b\u5f0f\u78bc\u6709\u66f4\u65b0\u6642\u4ed6\u6703\u81ea\u52d5\u5e6b\u4f60\u628a\u66f4\u65b0\u7684\u7a0b\u5f0f\u90e8\u7f72\u5230 Heroku \u4e2d\u3002 \u7531\u65bc\u81ea\u52d5\u66f4\u65b0\u8207\u90e8\u7f72\u6703\u6709\u4e0a\u9650\u6b21\u6578\uff0c\u7576\u4f60\u7684\u5c08\u6848\u5728 GitHub \u66f4\u65b0\u6b21\u6578\u592a\u983b\u7e41\u3002Heroku \u5c31\u6703\u505c\u6b62\u81ea\u52d5\u767c\u5e03\uff0c\u9019\u6642\u5019\u4f60\u4e5f\u53ef\u4ee5\u8a66\u8a66\u624b\u52d5\u90e8\u7f72\u3002 \u90e8\u7f72\u5c08\u6848 \u78ba\u8a8d\u4ee5\u4e0b\u4e8b\u60c5\u90fd\u5b8c\u6210\u5f8c\u5c31\u53ef\u4ee5\u90e8\u7f72\u7a0b\u5f0f\u56c9\uff01\u8a18\u5f97\u6211\u5011\u6709\u8ddf GitHub \u9023\u52d5\uff0c\u7576\u4f60\u7684\u5c08\u6848 git push \u5f8c Heroku \u5c31\u6703\u5e6b\u4f60\u81ea\u52d5\u90e8\u7f72\u4e86\u3002\u4f60\u53ef\u4ee5\u5f9e Activity \u5167\u770b\u5230\u90e8\u7f72\u72c0\u614b\uff0c\u4e5f\u80fd\u5f9e\u53f3\u4e0a\u89d2 More -> View logs \u89c0\u770b\u5f8c\u53f0 Log \u8a0a\u606f\u3002\u6216\u8005\u4f60\u4e5f\u53ef\u4ee5\u5f9e Deploy \u5167\u624b\u52d5\u90e8\u7f72\u4e5f\u884c\u3002 Python Flask API \u7a0b\u5f0f\u64b0\u5beb \u2705 \u5c08\u6848\u5167\u5efa\u7acb Procfile \u2705 Heroku \u5efa\u7acb\u5c08\u6848 \u2705 Heroku \u8207 GitHub\u9023\u52d5 \u2705 \u90e8\u7f72\u5b8c\u6210\u5f8c\u4f60\u53ef\u4ee5\u5728 Settings \u5167\u7684 Domains \u770b\u5230\u4f60\u7684\u96f2\u7aef\u9023\u7d50\uff0c\u9019\u500b\u9023\u7d50\u9ede\u4e0b\u53bb\u5c31\u80fd\u770b\u5230\u6211\u5011\u7684API\u56c9\uff01 https://flask-api-example-with-ml-mode.herokuapp.com \u6e2c\u8a66 API \u6628\u5929\u5df2\u7d93\u8ddf\u5927\u5bb6\u4ecb\u7d39 Postman \u7684\u4f7f\u7528\u65b9\u5f0f\u3002\u4eca\u5929\u6211\u5011\u5c31\u4f86\u8a66\u8a66\u90e8\u7f72\u5728\u96f2\u7aef\u4f3a\u670d\u5668\u7684\u7d50\u679c\uff0c\u57fa\u672c\u4e0a\u6e2c\u8a66\u7684\u65b9\u5f0f\u8ddf\u6628\u5929\u5728\u672c\u6a5f\u6e2c\u8a66\u7684\u65b9\u6cd5\u4e00\u6a21\u4e00\u6a23\u3002\u6253\u958b Postman \u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740 https://\u5c08\u6848\u540d\u7a31.herokuapp.com/predict \u3002\u4e26\u6a21\u64ec\u524d\u7aef\u4f7f\u7528\u8005\u767c\u9001\u6578\u503c Body -> raw -> JSON \u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u50b3\u7d66\u5f8c\u7aef API\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#day-30-heroku-api","text":"","title":"[Day 30] \u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_1","text":"\u52d5\u624b\u90e8\u7f72\u81ea\u5df1\u7684\u6a5f\u5668\u5b78\u7fd2 API \u4f7f\u7528 Heroku \u96f2\u7aef\u5e73\u53f0\u90e8\u7f72\u61c9\u7528\u7a0b\u5f0f \u7bc4\u4f8b\u7a0b\u5f0f\uff1a Code","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_2","text":"\u958b\u767c\u7684\u6700\u5f8c\u4e00\u54e9\u8def \u90e8\u7f72\u61c9\u7528 \u3002\u90e8\u7f72 API \u5fc5\u9808\u5728\u4e00\u500b\u7a69\u5b9a\u7684\u4f3a\u670d\u5668\u4e0a\u904b\u884c\uff0c\u5927\u591a\u6578\u4f01\u696d\u53ef\u80fd\u6703\u79df\u7528\u96f2\u7aef\u7684\u865b\u64ec\u4f3a\u670d\u5668\u3002\u5e38\u898b\u7684\u96f2\u7aef\u5e73\u53f0\u4e09\u5de8\u982d\u6709 Google Cloud Platform (GCP)\u3001Amazon Web Service (AWS) \u4ee5\u53ca Microsoft Azure\u3002\u4ee5\u4e0a\u4e09\u5bb6\u4f9b\u61c9\u5546\u90fd\u6709\u63d0\u4f9b\u514d\u8cbb\u7684\u8a66\u7528\u984d\u5ea6\u4ee5\u53ca\u90e8\u7f72\u7684\u6559\u5b78\uff0c\u53e6\u5916\u96f2\u7aef\u4f3a\u670d\u5668\u8a08\u8cbb\u7684\u65b9\u5f0f\u662f\u63a1\u7528\u591a\u5c11\u4ed8\u591a\u5c11\u7684\u6982\u5ff5\u6536\u8cbb\u3002\u82e5\u6709 GCP \u4f7f\u7528\u9700\u6c42\u53ef\u4ee5\u53c3\u8003\u6211\u904e\u53bb\u6240\u9304\u88fd\u7684\u7cfb\u5217\u6559\u5b78\u5f71\u7247 GCP\u6559\u5b78-Python \u3002","title":"\u524d\u8a00"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#heroku","text":"Heroku \u662f\u4e00\u500b\u652f\u63f4\u591a\u7a2e\u7a0b\u5f0f\u8a9e\u8a00\u7684\u96f2\u5e73\u53f0\u5373\u670d\u52d9\u3002\u4e26\u4e14\u63d0\u4f9b\u4e00\u500b~~\u514d\u8cbb~~(\u73fe\u5728\u8981\u4ed8\u8cbb\u4e86)\u7684\u96f2\u7aef\u670d\u52d9\uff0c\u9019\u500b\u96f2\u7aef\u5e73\u53f0~~\u4e00\u500b\u5e33\u865f\u53ef\u4ee5\u514d\u8cbb\u5efa\u7acb\u4e94\u500b\u5c08\u6848~~\uff0c\u96d6\u7136\u662f~~\u514d\u8cbb\u7576\u7136\u4e5f\u6709\u4f7f\u7528\u4e0a\u7684\u9650\u5236\u3002\u4f8b\u5982\uff1a(1) \u8d85\u904e 30\u5206\u9418 \u9592\u7f6e\u5c07\u6703\u9032\u5165\u7761\u7720\u72c0\u614b\uff0c\u4e4b\u5f8c\u91cd\u65b0\u555f\u52d5 API \u6642\u6703\u9700\u8981\u7b49\u5f85\u4e00\u4e9b\u6642\u9593\u624d\u6709\u56de\u61c9\u3002(2) 500MB \u7684\u5132\u5b58\u7a7a\u9593\u9650\u5236\u3002\u7576\u7136 Heroku \u4e5f\u63d0\u4f9b\u591a\u7a2e\u8a9e\u8a00\u7684\u90e8\u7f72\u74b0\u5883\u50cf\u662f Ruby\u3001Node.js\u3001PHP\u3001Go\u3001Python ...\u7b49\u3002~~ \u672c\u7bc7\u6587\u7ae0\u6703\u6559\u4f60\u5982\u4f55\u90e8\u7f72 Python \u7684 Flask API\u3002","title":"Heroku \u96f2\u7aef\u5e73\u53f0"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#1","text":"","title":"1. \u524d\u7f6e\u4f5c\u696d"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#11","text":"\u9019\u4e00\u7bc7\u6587\u7ae0\u5c07\u4ee5\u82b1\u6735\u5206\u985e API \u70ba\u4f8b\uff0c\u62ff\u4e00\u500b\u5148\u5df2\u7d93\u8a13\u7df4\u597d\u7684\u6a21\u578b\u9032\u884c Python Flask API \u7684\u958b\u767c\u8207\u90e8\u7f72\u3002\u81f3\u65bc\u6a21\u578b\u7684\u8a13\u7df4\u548c Flask API \u7684\u8a73\u7d30\u5167\u5bb9\u9019\u908a\u5c31\u4e0d\u7d30\u63d0\uff0c\u82e5\u5404\u4f4d\u60f3\u4e86\u89e3\u7684\u53ef\u4ee5\u53c3\u8003\u6628\u5929\u7684\u5167\u5bb9 [Day 29] \u4f7f\u7528 Python Flask \u67b6\u8a2d API \u5427\uff01 \u3002\u53e6\u5916\u5efa\u8b70\u5927\u5bb6\u53ef\u4ee5\u53c3\u8003\u4e0b\u9762\u9019\u4efd\u7a0b\u5f0f\u78bc\u9032\u884c\u4eca\u5929\u7684\u5167\u5bb9\u5be6\u4f5c\uff0c\u4f7f\u7528 GitHub \u4e26\u5c07\u7a0b\u5f0f fork \u5230\u81ea\u5df1\u7684\u5e33\u865f\u4e2d\u3002 \u7bc4\u4f8b\u7a0b\u5f0f\u78bc \u4ee5\u4e0b\u7c21\u55ae\u8aaa\u660e\u5c08\u6848\u5167\u90e8\u7f72 Heroku \u7684\u91cd\u8981\u6a94\u6848\u3002","title":"1.1) \u7bc4\u4f8b\u7a0b\u5f0f\u78bc"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#12-procfile","text":"Procfile \u9019\u500b\u6a94\u6848\u662f\u8981\u544a\u8a34 Heroku \u8981\u5982\u4f55\u555f\u52d5\u9019\u500b web app\uff0c\u5728 Heroku \u88e1\uff0c\u57f7\u884c Python \u8981\u4f7f\u7528 Gunicorn \u4f86\u555f\u52d5 web server\u3002\u6240\u4ee5\u5728 requirements.txt \u88e1\uff0c\u8acb\u8a18\u5f97\u8981\u8f38\u5165 gunicorn\u3002Procfile \u6a94\u6848\uff0c\u7684\u5167\u5bb9\u5982\u4e0b\uff1a web gunicorn run:app","title":"1.2 Procfile \u8a2d\u5b9a\u6a94"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#2-heroku","text":"","title":"2. \u90e8\u7f72 Heroku \u5c08\u6848"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#21-heroku","text":"\u5efa\u7acb\u5e33\u865f\u5f8c\u53f3\u4e0a\u89d2\u300c New \u300d\u4e2d\u7684\u300c Create new app \u300d\u5efa\u7acb\u7b2c\u4e00\u500b\u61c9\u7528\u7a0b\u5f0f\uff1a","title":"2.1 \u5728 Heroku \u5efa\u7acb\u61c9\u7528\u7a0b\u5f0f"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#22-github","text":"\u9019\u4e00\u6b65\u9a5f\u662f\u5c07 GitHub \u4e0a\u7684\u5c08\u6848\u76f4\u63a5\u8207 Heroku \u505a\u9023\u52d5\uff0c\u4f60\u4e5f\u53ef\u4ee5\u76f4\u63a5 Fork \u9019\u500b\u5c08\u6848\u76f4\u63a5\u5be6\u4f5c\u3002\u6216\u662f\u4f60\u4e5f\u53ef\u4ee5\u900f\u904e Heroku CLI \u76f4\u63a5\u5c07\u672c\u6a5f\u7684\u7a0b\u5f0f\u78bc\u90e8\u7f72\u5230 Heroku \u4e3b\u6a5f\u4e2d\u3002\u90e8\u7f72\u968e\u6bb5\u883b\u5403\u5927\u5bb6 Git \u7248\u63a7\u7684\u80fd\u529b\uff0c\u57fa\u672c\u7684\u6559\u5b78\u9019\u88e1\u5c31\u4e0d\u8d05\u8ff0\uff0c\u60f3\u4e86\u89e3\u66f4\u591a Git \u6280\u5de7\u53ef\u4ee5 \u53c3\u8003 \u3002 \u9ede\u9078 Enable Automatic Deploys \u9023\u52d5\u5f8c\u53ef\u4ee5\u9078\u64c7\u81ea\u52d5\u90e8\u7f72\u3002\u7576\u4f60 GitHub \u5c08\u6848\u7684\u7a0b\u5f0f\u78bc\u6709\u66f4\u65b0\u6642\u4ed6\u6703\u81ea\u52d5\u5e6b\u4f60\u628a\u66f4\u65b0\u7684\u7a0b\u5f0f\u90e8\u7f72\u5230 Heroku \u4e2d\u3002 \u7531\u65bc\u81ea\u52d5\u66f4\u65b0\u8207\u90e8\u7f72\u6703\u6709\u4e0a\u9650\u6b21\u6578\uff0c\u7576\u4f60\u7684\u5c08\u6848\u5728 GitHub \u66f4\u65b0\u6b21\u6578\u592a\u983b\u7e41\u3002Heroku \u5c31\u6703\u505c\u6b62\u81ea\u52d5\u767c\u5e03\uff0c\u9019\u6642\u5019\u4f60\u4e5f\u53ef\u4ee5\u8a66\u8a66\u624b\u52d5\u90e8\u7f72\u3002","title":"2.2 \u5c08\u6848\u8207 GitHub \u9023\u52d5"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#_3","text":"\u78ba\u8a8d\u4ee5\u4e0b\u4e8b\u60c5\u90fd\u5b8c\u6210\u5f8c\u5c31\u53ef\u4ee5\u90e8\u7f72\u7a0b\u5f0f\u56c9\uff01\u8a18\u5f97\u6211\u5011\u6709\u8ddf GitHub \u9023\u52d5\uff0c\u7576\u4f60\u7684\u5c08\u6848 git push \u5f8c Heroku \u5c31\u6703\u5e6b\u4f60\u81ea\u52d5\u90e8\u7f72\u4e86\u3002\u4f60\u53ef\u4ee5\u5f9e Activity \u5167\u770b\u5230\u90e8\u7f72\u72c0\u614b\uff0c\u4e5f\u80fd\u5f9e\u53f3\u4e0a\u89d2 More -> View logs \u89c0\u770b\u5f8c\u53f0 Log \u8a0a\u606f\u3002\u6216\u8005\u4f60\u4e5f\u53ef\u4ee5\u5f9e Deploy \u5167\u624b\u52d5\u90e8\u7f72\u4e5f\u884c\u3002 Python Flask API \u7a0b\u5f0f\u64b0\u5beb \u2705 \u5c08\u6848\u5167\u5efa\u7acb Procfile \u2705 Heroku \u5efa\u7acb\u5c08\u6848 \u2705 Heroku \u8207 GitHub\u9023\u52d5 \u2705 \u90e8\u7f72\u5b8c\u6210\u5f8c\u4f60\u53ef\u4ee5\u5728 Settings \u5167\u7684 Domains \u770b\u5230\u4f60\u7684\u96f2\u7aef\u9023\u7d50\uff0c\u9019\u500b\u9023\u7d50\u9ede\u4e0b\u53bb\u5c31\u80fd\u770b\u5230\u6211\u5011\u7684API\u56c9\uff01 https://flask-api-example-with-ml-mode.herokuapp.com","title":"\u90e8\u7f72\u5c08\u6848"},{"location":"30.\u4f7f\u7528 Heroku \u90e8\u7f72\u6a5f\u5668\u5b78\u7fd2 API/#api","text":"\u6628\u5929\u5df2\u7d93\u8ddf\u5927\u5bb6\u4ecb\u7d39 Postman \u7684\u4f7f\u7528\u65b9\u5f0f\u3002\u4eca\u5929\u6211\u5011\u5c31\u4f86\u8a66\u8a66\u90e8\u7f72\u5728\u96f2\u7aef\u4f3a\u670d\u5668\u7684\u7d50\u679c\uff0c\u57fa\u672c\u4e0a\u6e2c\u8a66\u7684\u65b9\u5f0f\u8ddf\u6628\u5929\u5728\u672c\u6a5f\u6e2c\u8a66\u7684\u65b9\u6cd5\u4e00\u6a21\u4e00\u6a23\u3002\u6253\u958b Postman \u9ede\u9078 POST \u4e26\u8cbc\u4e0a API \u7db2\u5740 https://\u5c08\u6848\u540d\u7a31.herokuapp.com/predict \u3002\u4e26\u6a21\u64ec\u524d\u7aef\u4f7f\u7528\u8005\u767c\u9001\u6578\u503c Body -> raw -> JSON \u5c07\u82b1\u6735\u7684\u56db\u500b\u53c3\u6578\u4ee5 JSON \u683c\u5f0f\u50b3\u7d66\u5f8c\u7aef API\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6e2c\u8a66 API"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/","text":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8cc7\u6599\u5982\u4f55\u6e05\u7406 \u4ec0\u9ebc\u662f\u8cc7\u6599\u6e05\u7406\uff1f \u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u5f0f \u70ba\u4ec0\u9ebc\u8cc7\u6599\u8981\u524d\u8655\u7406\u5462\uff1f\u524d\u8655\u88e1\u6709\u4f55\u597d\u8655\uff1f \u5b78\u7fd2 Sklearn \u4e2d\u56db\u7a2e\u4e0d\u540c\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f StandardScaler (\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee) MinMaxScaler(\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316) MaxAbsScaler\uff08\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316\uff09 RobustScaler \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u524d\u8a00 \u5f88\u591a\u6f14\u7b97\u6cd5\u5c0d\u6578\u64da\u7bc4\u570d\u975e\u5e38\u7684\u654f\u611f\u3002\u56e0\u6b64\u70ba\u4e86\u8981\u8b93\u6a21\u578b\u8a13\u7df4\u7684\u66f4\u5f37\u5927\uff0c\u901a\u5e38\u7684\u505a\u6cd5\u662f\u5c0d\u7279\u5fb5\u9032\u884c\u8abf\u7bc0\uff0c\u4f7f\u5f97\u6578\u64da\u66f4\u9069\u5408\u9019\u4e9b\u6f14\u7b97\u6cd5\u3002\u4e00\u822c\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a5f\u5668\u5b78\u7fd2\u6642\u5f80\u5f80\u6703\u505a\u7279\u5fb5\u7684\u6b63\u898f\u5316\u3002 \u8f09\u5165\u76f8\u95dc\u5957\u4ef6 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris np . set_printoptions ( suppress = True ) 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u4eca\u5929\u7684\u7bc4\u4f8b\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u4f8b\u5b50\uff0c\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u8cc7\u6599\u6b63\u898f\u5316\u7684\u793a\u7bc4\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data 2) \u6aa2\u67e5\u7f3a\u5931\u503c \u4f7f\u7528 numpy \u6240\u63d0\u4f9b\u7684\u51fd\u5f0f\u4f86\u6aa2\u67e5\u662f\u5426\u6709 NA \u7f3a\u5931\u503c\uff0c\u5047\u8a2d\u6709\u7f3a\u5931\u503c\u4f7f\u7528 dropna() \u4f86\u79fb\u9664\u3002\u4f7f\u7528\u7684\u6642\u6a5f\u5728\u65bc\u7576\u53ea\u6709\u5c11\u91cf\u7684\u7f3a\u5931\u503c\u9069\u7528\uff0c\u82e5\u9047\u5230\u6709\u5927\u91cf\u7f3a\u5931\u503c\u7684\u60c5\u6cc1\uff0c\u6216\u662f\u672c\u8eab\u7684\u8cc7\u6599\u91cf\u5c31\u5f88\u5c11\u7684\u60c5\u6cc1\u4e0b\u5efa\u8b70\u53ef\u4ee5\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u88dc\u503c\u4f86\u9810\u6e2c\u7f3a\u5931\u503c\u3002 X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] # checked missing data print ( \"checked missing data(NAN mount):\" , len ( np . where ( np . isnan ( X ))[ 0 ])) \u8f38\u51fa\u7d50\u679c\uff1a checked missing data(NAN mount): 0 \u7531\u65bc Sklearn \u6240\u63d0\u4f9b\u7684\u8cc7\u6599\u96c6\u975e\u5e38\u4e7e\u6de8\uff0c\u82e5\u4f60\u6536\u96c6\u5230\u7684\u8cc7\u6599\u6709\u8a31\u591a\u7684\u7f3a\u5931\u503c\u6216\u662f\u672c\u8eab\u8cc7\u6599\u91cf\u5c31\u4e0d\u591a\u7684\u5f37\u6cc1\u4e0b\uff0c\u5efa\u8b70\u597d\u597d\u7684\u53bb\u8655\u7406\u9019\u4e9b\u7f3a\u6f0f\u7684\u503c\u3002\u901a\u5e38\u88dc\u503c\u7684\u65b9\u6cd5\u53ef\u5206\u70ba\u624b\u52d5\u586b\u503c\u8207\u63d2\u503c\u6cd5\u3002\u9996\u5148\u624b\u52d5\u586b\u503c\u53ef\u4ee5\u4ee5\u8a72\u6b04\u4f4d\u6240\u6709\u8cc7\u6599\u7684\u7b97\u8853\u5e73\u5747\u6578\u6216\u4e2d\u4f4d\u6578\u505a\u586b\u88dc\u7684\u4f9d\u64da\u3002\u518d\u8005\u4f7f\u7528\u4ee5\u51fa\u73fe\u983b\u7387\u6700\u9ad8\u7684\u503c\u505a\u586b\u88dc\u4e5f\u662f\u5e38\u898b\u7684\u88dc\u503c\u65b9\u5f0f\u3002\u53e6\u4e00\u7a2e\u5dee\u503c\u6cd5\u662f\u900f\u904e\u6642\u9593\u6216\u7a7a\u9593\u4e0a\u7684\u6280\u5de7\u8655\u7406\u9019\u4e9b\u7f3a\u503c\uff0c\u4f8b\u5982\u7576\u8cc7\u6599\u662f\u6709\u6642\u9593\u5e8f\u5217\u7684\u56e0\u7d20\u5b58\u5728\u6642\uff0c\u53ef\u4ee5\u5229\u7528\u8a72\u7b46\u7f3a\u5931\u6b04\u4f4d\u9644\u8fd1\u7684\u6642\u9593\u9ede\u7684\u8cc7\u6599\u52a0\u7e3d\u4e26\u5e73\u5747\u3002 3) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6 \u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684 train_test_split() \u65b9\u6cd5\u4f86\u70ba\u6211\u5011\u7684\u8cc7\u6599\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e00\u4e9b\u53c3\u6578\u4f86\u8b93\u6211\u5011\u5207\u5272\u7684\u8cc7\u6599\u66f4\u591a\u6a23\u6027\u3002\u5176\u4e2d test_size \u53c3\u6578\u5c31\u662f\u8a2d\u5b9a\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\uff0c\u7bc4\u4f8b\u4e2d\u6211\u5011\u8a2d\u5b9a 0.3 \u5373\u4ee3\u8868\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u70ba 7:3\u3002\u53e6\u5916\u9810\u8a2d\u8cc7\u6599\u5207\u5272\u7684\u65b9\u5f0f\u662f\u96a8\u6a5f\u5207\u5272 shuffle=True \u5c0d\u539f\u59cb\u6578\u64da\u9032\u884c\u96a8\u6a5f\u62bd\u6a23\uff0c\u4ee5\u4fdd\u8b49\u96a8\u6a5f\u6027\u3002\u82e5\u60f3\u8981\u6bcf\u6b21\u7a0b\u5f0f\u57f7\u884c\u6642\u5207\u5272\u7d50\u679c\u90fd\u662f\u4e00\u6a23\u7684\u53ef\u4ee5\u8a2d\u5b9a\u4e82\u6578\u96a8\u6a5f\u7a2e\u5b50 random_state \u4e26\u7d66\u4e88\u4e00\u500b\u96a8\u6a5f\u6578\u503c\u3002\u6700\u5f8c\u4e00\u500b\u662f stratify \u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\uff0c\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\u3002\u4f7f\u7528\u6642\u6a5f\u662f\u78ba\u4fdd\u5206\u985e\u554f\u984c y \u7684\u985e\u5225\u6578\u91cf\u5206\u4f48\u8981\u8207\u539f\u8cc7\u6599\u96c6\u4e00\u81f4\u3002\u4ee5\u514d\u8cc7\u6599\u96c6\u5207\u5272\u4e0d\u5e73\u5747\u5c0e\u81f4\u6a21\u578b\u8a13\u7df4\u6642\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4) Standardization\u5e73\u5747&\u8b8a\u7570\u6578\u6a19\u6e96\u5316 \u5c07\u6240\u6709\u7279\u5fb5\u6a19\u6e96\u5316\uff0c\u4e5f\u5c31\u662f\u9ad8\u65af\u5206\u4f48\u3002\u4f7f\u5f97\u6578\u64da\u7684\u5e73\u5747\u503c\u70ba 0\uff0c\u65b9\u5dee\u70ba 1\u3002\u9069\u5408\u7684\u4f7f\u7528\u6642\u6a5f\u65bc\u7576\u6709\u4e9b\u7279\u5fb5\u7684\u65b9\u5dee\u904e\u5927\u6642\uff0c\u4f7f\u7528\u6a19\u6e96\u5316\u80fd\u5920\u6709\u6548\u5730\u8b93\u6a21\u578b\u5feb\u901f\u6536\u6582\u3002 from sklearn.preprocessing import StandardScaler scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled\u4e4b\u5f8c\u7684\u8cc7\u6599\u96f6\u5747\u503c\uff0c\u55ae\u4f4d\u65b9\u5dee print ( '\u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : ' , X_train . mean ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : ' , X_train . std ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : ' , X_train_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_train_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : [5.87333333 3.0552381 3.7847619 1.20571429] \u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : [0.85882164 0.45502087 1.77553646 0.77383751] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : [ 0. -0. -0. -0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6 X \u7684\u6a19\u6e96\u5dee : [1. 1. 1. 1.] \u8a13\u7df4\u96c6\u7684 Scaler \u64ec\u5408\u5b8c\u6210\u5f8c\uff0c\u6211\u5011\u5c31\u80fd\u505a\u76f8\u540c\u7684\u8f49\u63db\u5728\u6e2c\u8a66\u96c6\u4e0a\u3002 X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : ' , X_test_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_test_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : [0.40925926 0.44259259 0.44750958 0.45185185] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : [0.20457725 0.15915694 0.29647499 0.30224923] \u5982\u679c\u60f3\u5c07\u8f49\u63db\u5f8c\u7684\u8cc7\u6599\u9084\u539f\u53ef\u4ee5\u4f7f\u7528 inverse_transform() \u5c07\u6578\u503c\u9084\u539f\u6210\u539f\u672c\u7684\u8f38\u5165\u3002 # \u5c07\u7e2e\u653e\u7684\u8cc7\u6599\u9084\u539f X_test_inverse = scaler . inverse_transform ( X_test_scaled ) MinMaxScaler\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316 \u5728 MinMaxScaler \u4e2d\u662f\u7d66\u5b9a\u4e86\u4e00\u500b\u660e\u78ba\u7684\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002\u6bcf\u500b\u7279\u5fb5\u4e2d\u7684\u6700\u5c0f\u503c\u8b8a\u6210\u4e860\uff0c\u6700\u5927\u503c\u8b8a\u6210\u4e861\u3002\u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[0,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled \u4e4b\u5f8c\u7684\u8cc7\u6599\u6700\u5c0f\u503c\u3001\u6700\u5927\u503c print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : ' , X_train . min ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : ' , X_train . max ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : ' , X_train_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : ' , X_train_scaled . max ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : [4.3 2. 1.1 0.1] \u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : [7.9 4.4 6.9 2.5] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : [0. 0. 0. 0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : [1. 1. 1. 1.] X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : ' , X_test_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : ' , X_test_scaled . max ( axis = 0 )) StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : [ 0.02777778 0.125 -0.01724138 0.04166667] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : [0.83333333 0.83333333 0.89655172 0.95833333] MaxAbsScaler\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316 MaxAbsScaler \u8207 MinMaxScaler \u985e\u4f3c\uff0c\u6240\u6709\u6578\u64da\u90fd\u6703\u9664\u4ee5\u8a72\u5217\u7d55\u5c0d\u503c\u5f8c\u7684\u6700\u5927\u503c\u3002 \u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[-1,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MaxAbsScaler scaler = MaxAbsScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler . transform ( X_test ) RobustScaler \u53ef\u4ee5\u6709\u6548\u7684\u7e2e\u653e\u5e36\u6709 outlier \u7684\u6578\u64da\uff0c\u900f\u904e Robust \u5982\u679c\u6578\u64da\u4e2d\u542b\u6709\u7570\u5e38\u503c\u5728\u7e2e\u653e\u4e2d\u6703\u6368\u53bb\u3002 from sklearn.preprocessing import RobustScaler scaler = RobustScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler.transform(X_test) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#day-4","text":"","title":"[Day 4] \u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_1","text":"\u8cc7\u6599\u5982\u4f55\u6e05\u7406 \u4ec0\u9ebc\u662f\u8cc7\u6599\u6e05\u7406\uff1f \u8cc7\u6599\u524d\u8655\u7406\u7684\u65b9\u5f0f \u70ba\u4ec0\u9ebc\u8cc7\u6599\u8981\u524d\u8655\u7406\u5462\uff1f\u524d\u8655\u88e1\u6709\u4f55\u597d\u8655\uff1f \u5b78\u7fd2 Sklearn \u4e2d\u56db\u7a2e\u4e0d\u540c\u8cc7\u6599\u524d\u8655\u7406\u65b9\u5f0f StandardScaler (\u5e73\u5747\u503c\u548c\u6a19\u6e96\u5dee) MinMaxScaler(\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316) MaxAbsScaler\uff08\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316\uff09 RobustScaler \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_2","text":"\u5f88\u591a\u6f14\u7b97\u6cd5\u5c0d\u6578\u64da\u7bc4\u570d\u975e\u5e38\u7684\u654f\u611f\u3002\u56e0\u6b64\u70ba\u4e86\u8981\u8b93\u6a21\u578b\u8a13\u7df4\u7684\u66f4\u5f37\u5927\uff0c\u901a\u5e38\u7684\u505a\u6cd5\u662f\u5c0d\u7279\u5fb5\u9032\u884c\u8abf\u7bc0\uff0c\u4f7f\u5f97\u6578\u64da\u66f4\u9069\u5408\u9019\u4e9b\u6f14\u7b97\u6cd5\u3002\u4e00\u822c\u4f86\u8aaa\uff0c\u6211\u5011\u5728\u505a\u6a5f\u5668\u5b78\u7fd2\u6642\u5f80\u5f80\u6703\u505a\u7279\u5fb5\u7684\u6b63\u898f\u5316\u3002","title":"\u524d\u8a00"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#_3","text":"import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris np . set_printoptions ( suppress = True )","title":"\u8f09\u5165\u76f8\u95dc\u5957\u4ef6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#1","text":"\u4eca\u5929\u7684\u7bc4\u4f8b\u6211\u5011\u5ef6\u7e8c\u6628\u5929\u7684\u4f8b\u5b50\uff0c\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c\u8cc7\u6599\u6b63\u898f\u5316\u7684\u793a\u7bc4\u3002 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#2","text":"\u4f7f\u7528 numpy \u6240\u63d0\u4f9b\u7684\u51fd\u5f0f\u4f86\u6aa2\u67e5\u662f\u5426\u6709 NA \u7f3a\u5931\u503c\uff0c\u5047\u8a2d\u6709\u7f3a\u5931\u503c\u4f7f\u7528 dropna() \u4f86\u79fb\u9664\u3002\u4f7f\u7528\u7684\u6642\u6a5f\u5728\u65bc\u7576\u53ea\u6709\u5c11\u91cf\u7684\u7f3a\u5931\u503c\u9069\u7528\uff0c\u82e5\u9047\u5230\u6709\u5927\u91cf\u7f3a\u5931\u503c\u7684\u60c5\u6cc1\uff0c\u6216\u662f\u672c\u8eab\u7684\u8cc7\u6599\u91cf\u5c31\u5f88\u5c11\u7684\u60c5\u6cc1\u4e0b\u5efa\u8b70\u53ef\u4ee5\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u65b9\u6cd5\u88dc\u503c\u4f86\u9810\u6e2c\u7f3a\u5931\u503c\u3002 X = df_data . drop ( labels = [ 'Species' ], axis = 1 ) . values # \u79fb\u9664Species\u4e26\u53d6\u5f97\u5269\u4e0b\u6b04\u4f4d\u8cc7\u6599 y = df_data [ 'Species' ] # checked missing data print ( \"checked missing data(NAN mount):\" , len ( np . where ( np . isnan ( X ))[ 0 ])) \u8f38\u51fa\u7d50\u679c\uff1a checked missing data(NAN mount): 0 \u7531\u65bc Sklearn \u6240\u63d0\u4f9b\u7684\u8cc7\u6599\u96c6\u975e\u5e38\u4e7e\u6de8\uff0c\u82e5\u4f60\u6536\u96c6\u5230\u7684\u8cc7\u6599\u6709\u8a31\u591a\u7684\u7f3a\u5931\u503c\u6216\u662f\u672c\u8eab\u8cc7\u6599\u91cf\u5c31\u4e0d\u591a\u7684\u5f37\u6cc1\u4e0b\uff0c\u5efa\u8b70\u597d\u597d\u7684\u53bb\u8655\u7406\u9019\u4e9b\u7f3a\u6f0f\u7684\u503c\u3002\u901a\u5e38\u88dc\u503c\u7684\u65b9\u6cd5\u53ef\u5206\u70ba\u624b\u52d5\u586b\u503c\u8207\u63d2\u503c\u6cd5\u3002\u9996\u5148\u624b\u52d5\u586b\u503c\u53ef\u4ee5\u4ee5\u8a72\u6b04\u4f4d\u6240\u6709\u8cc7\u6599\u7684\u7b97\u8853\u5e73\u5747\u6578\u6216\u4e2d\u4f4d\u6578\u505a\u586b\u88dc\u7684\u4f9d\u64da\u3002\u518d\u8005\u4f7f\u7528\u4ee5\u51fa\u73fe\u983b\u7387\u6700\u9ad8\u7684\u503c\u505a\u586b\u88dc\u4e5f\u662f\u5e38\u898b\u7684\u88dc\u503c\u65b9\u5f0f\u3002\u53e6\u4e00\u7a2e\u5dee\u503c\u6cd5\u662f\u900f\u904e\u6642\u9593\u6216\u7a7a\u9593\u4e0a\u7684\u6280\u5de7\u8655\u7406\u9019\u4e9b\u7f3a\u503c\uff0c\u4f8b\u5982\u7576\u8cc7\u6599\u662f\u6709\u6642\u9593\u5e8f\u5217\u7684\u56e0\u7d20\u5b58\u5728\u6642\uff0c\u53ef\u4ee5\u5229\u7528\u8a72\u7b46\u7f3a\u5931\u6b04\u4f4d\u9644\u8fd1\u7684\u6642\u9593\u9ede\u7684\u8cc7\u6599\u52a0\u7e3d\u4e26\u5e73\u5747\u3002","title":"2) \u6aa2\u67e5\u7f3a\u5931\u503c"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#3","text":"\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684 train_test_split() \u65b9\u6cd5\u4f86\u70ba\u6211\u5011\u7684\u8cc7\u6599\u9032\u884c\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u5207\u5272\u3002\u5728\u6b64\u65b9\u6cd5\u4e2d\u6211\u5011\u53ef\u4ee5\u8a2d\u5b9a\u4e00\u4e9b\u53c3\u6578\u4f86\u8b93\u6211\u5011\u5207\u5272\u7684\u8cc7\u6599\u66f4\u591a\u6a23\u6027\u3002\u5176\u4e2d test_size \u53c3\u6578\u5c31\u662f\u8a2d\u5b9a\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\uff0c\u7bc4\u4f8b\u4e2d\u6211\u5011\u8a2d\u5b9a 0.3 \u5373\u4ee3\u8868\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684\u6bd4\u4f8b\u70ba 7:3\u3002\u53e6\u5916\u9810\u8a2d\u8cc7\u6599\u5207\u5272\u7684\u65b9\u5f0f\u662f\u96a8\u6a5f\u5207\u5272 shuffle=True \u5c0d\u539f\u59cb\u6578\u64da\u9032\u884c\u96a8\u6a5f\u62bd\u6a23\uff0c\u4ee5\u4fdd\u8b49\u96a8\u6a5f\u6027\u3002\u82e5\u60f3\u8981\u6bcf\u6b21\u7a0b\u5f0f\u57f7\u884c\u6642\u5207\u5272\u7d50\u679c\u90fd\u662f\u4e00\u6a23\u7684\u53ef\u4ee5\u8a2d\u5b9a\u4e82\u6578\u96a8\u6a5f\u7a2e\u5b50 random_state \u4e26\u7d66\u4e88\u4e00\u500b\u96a8\u6a5f\u6578\u503c\u3002\u6700\u5f8c\u4e00\u500b\u662f stratify \u5206\u5c64\u96a8\u6a5f\u62bd\u6a23\uff0c\u7279\u5225\u662f\u5728\u539f\u59cb\u6578\u64da\u4e2d\u6a23\u672c\u6a19\u7c64\u5206\u4f48\u4e0d\u5747\u8861\u6642\u975e\u5e38\u6709\u7528\u3002\u4f7f\u7528\u6642\u6a5f\u662f\u78ba\u4fdd\u5206\u985e\u554f\u984c y \u7684\u985e\u5225\u6578\u91cf\u5206\u4f48\u8981\u8207\u539f\u8cc7\u6599\u96c6\u4e00\u81f4\u3002\u4ee5\u514d\u8cc7\u6599\u96c6\u5207\u5272\u4e0d\u5e73\u5747\u5c0e\u81f4\u6a21\u578b\u8a13\u7df4\u6642\u6709\u5f88\u5927\u7684\u504f\u5dee\u3002 from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.3 , random_state = 42 , stratify = y ) print ( 'train shape:' , X_train . shape ) print ( 'test shape:' , X_test . shape ) \u8f38\u51fa\u7d50\u679c\uff1a train shape: (105, 4) test shape: (45, 4)","title":"3) \u5207\u5272\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#standardization","text":"\u5c07\u6240\u6709\u7279\u5fb5\u6a19\u6e96\u5316\uff0c\u4e5f\u5c31\u662f\u9ad8\u65af\u5206\u4f48\u3002\u4f7f\u5f97\u6578\u64da\u7684\u5e73\u5747\u503c\u70ba 0\uff0c\u65b9\u5dee\u70ba 1\u3002\u9069\u5408\u7684\u4f7f\u7528\u6642\u6a5f\u65bc\u7576\u6709\u4e9b\u7279\u5fb5\u7684\u65b9\u5dee\u904e\u5927\u6642\uff0c\u4f7f\u7528\u6a19\u6e96\u5316\u80fd\u5920\u6709\u6548\u5730\u8b93\u6a21\u578b\u5feb\u901f\u6536\u6582\u3002 from sklearn.preprocessing import StandardScaler scaler = StandardScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled\u4e4b\u5f8c\u7684\u8cc7\u6599\u96f6\u5747\u503c\uff0c\u55ae\u4f4d\u65b9\u5dee print ( '\u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : ' , X_train . mean ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : ' , X_train . std ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : ' , X_train_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_train_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u5e73\u5747\u503c : [5.87333333 3.0552381 3.7847619 1.20571429] \u8cc7\u6599\u96c6 X \u7684\u6a19\u6e96\u5dee : [0.85882164 0.45502087 1.77553646 0.77383751] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u5e73\u5747\u503c : [ 0. -0. -0. -0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6 X \u7684\u6a19\u6e96\u5dee : [1. 1. 1. 1.] \u8a13\u7df4\u96c6\u7684 Scaler \u64ec\u5408\u5b8c\u6210\u5f8c\uff0c\u6211\u5011\u5c31\u80fd\u505a\u76f8\u540c\u7684\u8f49\u63db\u5728\u6e2c\u8a66\u96c6\u4e0a\u3002 X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : ' , X_test_scaled . mean ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : ' , X_test_scaled . std ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u5e73\u5747\u503c : [0.40925926 0.44259259 0.44750958 0.45185185] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6a19\u6e96\u5dee : [0.20457725 0.15915694 0.29647499 0.30224923] \u5982\u679c\u60f3\u5c07\u8f49\u63db\u5f8c\u7684\u8cc7\u6599\u9084\u539f\u53ef\u4ee5\u4f7f\u7528 inverse_transform() \u5c07\u6578\u503c\u9084\u539f\u6210\u539f\u672c\u7684\u8f38\u5165\u3002 # \u5c07\u7e2e\u653e\u7684\u8cc7\u6599\u9084\u539f X_test_inverse = scaler . inverse_transform ( X_test_scaled )","title":"Standardization\u5e73\u5747&\u8b8a\u7570\u6578\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#minmaxscaler","text":"\u5728 MinMaxScaler \u4e2d\u662f\u7d66\u5b9a\u4e86\u4e00\u500b\u660e\u78ba\u7684\u6700\u5927\u503c\u8207\u6700\u5c0f\u503c\u3002\u6bcf\u500b\u7279\u5fb5\u4e2d\u7684\u6700\u5c0f\u503c\u8b8a\u6210\u4e860\uff0c\u6700\u5927\u503c\u8b8a\u6210\u4e861\u3002\u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[0,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler () X_train_scaled = scaler . fit_transform ( X_train ) # scaled \u4e4b\u5f8c\u7684\u8cc7\u6599\u6700\u5c0f\u503c\u3001\u6700\u5927\u503c print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : ' , X_train . min ( axis = 0 )) print ( '\u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : ' , X_train . max ( axis = 0 )) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : ' , X_train_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : ' , X_train_scaled . max ( axis = 0 )) \u8f38\u51fa\u7d50\u679c\uff1a \u8cc7\u6599\u96c6 X \u7684\u6700\u5c0f\u503c : [4.3 2. 1.1 0.1] \u8cc7\u6599\u96c6 X \u7684\u6700\u5927\u503c : [7.9 4.4 6.9 2.5] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5c0f\u503c : [0. 0. 0. 0.] StandardScaler \u7e2e\u653e\u904e\u5f8c\u8a13\u7df4\u96c6\u7684\u6700\u5927\u503c : [1. 1. 1. 1.] X_test_scaled = scaler . transform ( X_test ) print ( ' \\n StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : ' , X_test_scaled . min ( axis = 0 )) print ( 'StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : ' , X_test_scaled . max ( axis = 0 )) StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5c0f\u503c : [ 0.02777778 0.125 -0.01724138 0.04166667] StandardScaler \u7e2e\u653e\u904e\u5f8c\u6e2c\u8a66\u96c6\u7684\u6700\u5927\u503c : [0.83333333 0.83333333 0.89655172 0.95833333]","title":"MinMaxScaler\u6700\u5c0f\u6700\u5927\u503c\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#maxabsscaler","text":"MaxAbsScaler \u8207 MinMaxScaler \u985e\u4f3c\uff0c\u6240\u6709\u6578\u64da\u90fd\u6703\u9664\u4ee5\u8a72\u5217\u7d55\u5c0d\u503c\u5f8c\u7684\u6700\u5927\u503c\u3002 \u6578\u64da\u6703\u7e2e\u653e\u5230\u5230[-1,1]\u4e4b\u9593\u3002 from sklearn.preprocessing import MaxAbsScaler scaler = MaxAbsScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler . transform ( X_test )","title":"MaxAbsScaler\u7d55\u5c0d\u503c\u6700\u5927\u6a19\u6e96\u5316"},{"location":"4.\u54b1\u5011\u4e00\u8d77\u505a\u8cc7\u6599\u6e05\u7406\u548c\u524d\u8655\u7406/#robustscaler","text":"\u53ef\u4ee5\u6709\u6548\u7684\u7e2e\u653e\u5e36\u6709 outlier \u7684\u6578\u64da\uff0c\u900f\u904e Robust \u5982\u679c\u6578\u64da\u4e2d\u542b\u6709\u7570\u5e38\u503c\u5728\u7e2e\u653e\u4e2d\u6703\u6368\u53bb\u3002 from sklearn.preprocessing import RobustScaler scaler = RobustScaler () . fit ( X ) X_scaled = scaler . transform ( X ) X_test_scaled = scaler.transform(X_test) \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"RobustScaler"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/","text":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u4e86\u89e3\u6a5f\u5668\u5b78\u7fd2\u662f\u4ec0\u9ebc \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u8cc7\u6599\u79d1\u5b78\u4e09\u528d\u5ba2 \u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u6709\u54ea\u4e9b\uff1f \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u8a8d\u8b58\u4ec0\u9ebc\u662f\u8cc7\u6599 \u6a5f\u5668\u5b78\u7fd2\u7684\u6d41\u7a0b \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u627e\u51fa\u89e3\u6c7a\u554f\u984c\u7684\u65b9\u6cd5\u3002\u7c21\u55ae\u4f86\u8aaa\u4f60\u53ea\u8981\u5c07\u5927\u91cf\u7684\u8cc7\u6599\u9935\u7d66\u96fb\u8166\uff0c\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u6703\u70ba\u4f60\u91cf\u8eab\u6253\u9020\u5b78\u7fd2\u51fa\u4e00\u500b\u7279\u5b9a\u7684\u6a21\u578b\u7d66\u4f60\uff0c\u800c\u4e0d\u662f\u518d\u900f\u904e\u4eba\u985e\u624b\u52d5\u7684\u7d66\u4e88\u898f\u5247\u3002\u900f\u904e\u4e00\u5806\u8cc7\u6599\u6709\u6a19\u7c64\u7d66\u7b54\u6848\uff0c\u4e26\u5f9e\u8cc7\u6599\u96c6\u5b78\u7fd2\u8207\u6a19\u8a18\u9593\u7684\u95dc\u806f\uff0c\u6700\u5f8c\u518d\u5f9e\u975e\u7279\u5b9a\u8cc7\u6599\u53bb\u8fa8\u8a8d\u7b54\u6848\u3002 \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u5176\u5be6\u4eba\u5de5\u667a\u6167\u7684\u61c9\u7528\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\u96a8\u8655\u53ef\u898b\uff0c\u5f9e\u88fd\u9020\u3001\u91ab\u7642\u3001\u91d1\u878d\u3001\u4ea4\u901a\u3001\u5b89\u9632\u3001 \u96f6\u552e\u3001\u7269\u6d41\u3001\u8fb2\u696d......\u7b49\u90fd\u53ef\u4ee5\u770b\u5230\u8207 AI \u7684\u76f8\u95dc\u61c9\u7528\u3002\u7576\u7136\u4eba\u5de5\u667a\u6167\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u66c7\u82b1\u4e00\u73fe\uff0cArtificial Intelligence \u9019\u4e00\u8a5e\u5176\u5be6\u65e9\u5728 20 \u4e16\u7d00\u4e2d\u5c31\u88ab\u63d0\u51fa\uff0c\u8d77\u521d\u7576\u7136\u4e0d\u88ab\u770b\u597d\u751a\u81f3\u5927\u5bb6\u90fd\u89ba\u5f97\u8981\u4e00\u500b\u6a5f\u5668\u4eba\u5b78\u6703\u4eba\u985e\u7684\u667a\u6167\u662f\u5929\u65b9\u591c\u8b5a\u7684\u4e8b\u60c5\uff0c\u4e2d\u9593\u4e5f\u7d93\u6b77\u597d\u5e7e\u6b21 AI \u5bd2\u51ac\uff0c\u73fe\u5728\u56de\u904e\u982d\u4f86\u770b AI \u7684\u7814\u7a76\u9818\u57df\u8d77\u4f0f\u4f0f\u3002\u4e0d\u904e\u96a8\u8457\u8edf\u786c\u9ad4\u7684\u9032\u6b65\uff0c\u9010\u6f38\u4f7f\u5f97\u9700\u8981\u5927\u91cf\u8a08\u7b97\u7684\u4eba\u5de5\u667a\u6167\u6280\u8853\u6162\u6162\u7684\u88ab\u6316\u6398\u51fa\u4f86\u3002\u8fd1\u5e74\u4f86 AI \u65b0\u5275\u5982\u8207\u6625\u7b4d\u822c\u5192\u51fa\uff0c\u667a\u6167\u6a5f\u5668\u4eba\u3001\u611f\u77e5\u8b58\u5225\u3001\u81ea\u7136\u8a9e\u8a00\u8655\u7406\u3001\u5c0d\u8a71\u5ba2\u670d\u3001\u81ea\u52d5\u99d5\u99db\u3001\u7455\u75b5\u6aa2\u6e2c\u3001\u9810\u9632\u6027\u7dad\u4fee\u3001\u81ea\u52d5\u6d41\u7a0b\u63a7\u5236\u3001\u539f\u6599\u7d44\u5408\u6700\u4f73\u5316......\u7b49\u3002 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u5176\u5be6\u4eba\u5de5\u667a\u6167\u9019\u9805\u9818\u57df\u53c8\u5206\u6210\u5f88\u591a\u9580\u6d3e\uff0c\u5f9e\u6700\u65e9\u7684\u7b26\u865f\u908f\u8f2f\u3001\u5c08\u5bb6\u7cfb\u7d71\u958b\u59cb\u8aaa\u8d77\u3002\u65e9\u671f\u7684 AI \u662f\u5c07\u4eba\u985e\u7684\u5c08\u5bb6\u77e5\u8b58\u900f\u904e\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\u653e\u5230\u6a5f\u5668\u4eba\u7684\u5927\u8166\u4e2d\uff0c\u4e26\u8ce6\u4e88\u6a5f\u5668\u4eba\u667a\u6167\u4f7f\u5f97\u6709\u80fd\u529b\u5224\u65b7\u4e8b\u7269\u3002\u7576\u7136\u4eba\u985e\u5c08\u5bb6\u7684\u77e5\u8b58\u59cb\u7d42\u6709\u9650\uff0c\u96a8\u8457\u7db2\u8def\u8207\u500b\u4eba\u96fb\u8166\u666e\u53ca\u4e26\u9032\u5165\u4e86\u5927\u6578\u64da\u6642\u4ee3\u3002\u5404\u500b\u79d1\u5b78\u5bb6\u65bc\u662f\u958b\u59cb\u601d\u8003\u5982\u4f55\u5c07\u9019\u4e9b\u641c\u96c6\u4f86\u7684\u5927\u91cf\u6578\u64da\u9032\u884c\u61c9\u7528\u8207\u5206\u6790\uff1f\u6a5f\u5668\u5b78\u7fd2\u4e00\u8a5e\u5c31\u51fa\u73fe\u4e86\uff0c\u76ee\u6a19\u662f\u900f\u904e\u73fe\u5be6\u751f\u6d3b\u4e2d\u6240\u6536\u96c6\u7684\u8cc7\u6599\uff0c\u642d\u914d\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u51fa\u4f86\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u6a5f\u5668\u4eba\u6709\u5224\u65b7\u8207\u9810\u6e2c\u7684\u80fd\u529b\u3002\u7576\u7136\u8fd1\u5e7e\u5e74\u71b1\u9580\u7684\u6df1\u5ea6\u5b78\u7fd2\u5176\u5be6\u50c5\u662f\u500b\u6a5f\u5668\u5b78\u7fd2\u88e1\u9762\u7684\u5176\u4e2d\u4e00\u7a2e\u5b78\u7fd2\u7684\u65b9\u6cd5\uff0c\u4ed6\u662f\u6a21\u4eff\u4eba\u985e\u7684\u795e\u7d93\u7cfb\u7d71\uff0c\u900f\u904e\u5927\u91cf\u7684\u795e\u7d93\u5143\u8207\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u5efa\u69cb\u51fa\u4f86\u7684\u8907\u96dc\u6578\u5b78\u6a21\u578b\u3002\u7136\u800c\u5728\u672c\u7cfb\u5217\u6559\u5b78\u4e2d\u6211\u5011\u6703\u5f9e\u6700\u57fa\u790e\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u958b\u59cb\u63d0\u8d77\uff0c\u4e26\u4e00\u6b65\u4e00\u6b65\u7684\u5e36\u9818\u8b80\u8005\u6210\u70ba\u4e00\u4f4d\u771f\u6b63\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u3002 \u8cc7\u6599\u79d1\u5b78 \u2715 \u4e09\u528d\u5ba2 \u8cc7\u6599\u79d1\u5b78\u4e3b\u8981\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u6280\u8853\uff0c\u8b93\u6a5f\u5668\u53ef\u4ee5\u9810\u6e2c\u6216\u8005\u63a8\u8ad6\u3002\u5176\u4e2d\u9019\u5e7e\u5e74\u5f88\u592f\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u9019\u4e00\u540d\u8a5e\u5176\u5be6\u662f\u7531\u4e09\u7a2e\u4eba\u6240\u7d44\u5408\u8d77\u4f86\u7684\u3002\u7b2c\u4e00\u500b\u662f\u6578\u5b78\u8207\u7d71\u8a08\u80cc\u666f\u7684\u4eba\uff0c\u4ed6\u5011\u80fd\u5920\u900f\u904e\u5c0d\u8cc7\u6599\u7684\u654f\u611f\u5ea6\u5f9e\u4e00\u5927\u7fa4\u539f\u59cb\u8cc7\u6599\u4e2d\u63a2\u7d22\u6709\u610f\u7fa9\u7684\u8cc7\u8a0a\u3002\u4e26\u8a2d\u8a08\u4e00\u5957\u9069\u5408\u7684\u6a21\u578b\u70ba\u9019\u4e00\u7fa4\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u7b2c\u4e8c\u7a2e\u4eba\u662f\u96fb\u8166\u79d1\u5b78\u80cc\u666f\u7684\u5de5\u7a0b\u5e2b\uff0c\u4ed6\u5011\u64c5\u9577\u7a0b\u5f0f\u8a9e\u8a00\u80fd\u5920\u5c07\u8907\u96dc\u7684\u6578\u5b78\u6a21\u578b\u5920\u904e\u7a0b\u5f0f\u5be6\u4f5c\u4e26\u4e14\u5354\u52a9\u843d\u5730\u6574\u5408\u3002\u7576\u7136\u73fe\u4eca\u6709\u975e\u5e38\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u5957\u4ef6\u4f8b\u5982 Sklearn\u3001TensorFlow......\u7b49\uff0c\u964d\u4f4e\u4e86\u5927\u5bb6\u5b78\u7fd2\u7684\u9580\u6abb\uff0c\u4e0d\u4e00\u5b9a\u662f\u8981\u7406\u5de5\u80cc\u666f\u7684\u4eba\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e9b\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u4e00\u7aba\u4eba\u5de5\u667a\u6167\u7684\u5967\u79d8\u3002\u9664\u6b64\u4e4b\u5916 MLOps \u662f\u8fd1\u5e74\u4f86\u5ef6\u4f38\u51fa\u4f86\u7684\u65b0\u540d\u8a5e\uff0c\u5176\u5be6\u6982\u5ff5\u8207 DevOps \u985e\u4f3c\u4e26\u5c07\u9019\u4e00\u5957\u6a5f\u5236\u8907\u88fd\u5728\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u4e0a\uff0c\u6211\u5011\u5e73\u6642\u6240\u57f7\u884c\u7684 AI \u5c08\u6848\u5fc5\u9808\u900f\u904e\u6301\u7e8c\u6027\u6574\u5408\u8207\u7dad\u904b\u7684\u89c0\u5ff5\u4e0d\u65b7\u7684\u9031\u671f\u6027\u66f4\u65b0\u5f9e\u6700\u65b0\u6536\u96c6\u5230\u5f97\u6578\u64da\u91cd\u65b0\u5b78\u7fd2\u6a21\u578b\u8d8a\u4f86\u8d8a\u8cbc\u8fd1\u4f7f\u7528\u8005\u3002\u6700\u5f8c\u4e00\u500b\u95dc\u9375\u7684\u4eba\u7269\u5c31\u662f\u5404\u884c\u5404\u696d\u7684\u9818\u57df\u5c08\u5bb6\uff0c\u56e0\u70ba AI \u518d\u4e5f\u4e0d\u662f\u8cc7\u8a0a\u80cc\u666f\u4eba\u7684\u5c08\u5229\u3002\u6211\u5011\u53ef\u4ee5\u5920\u904e AI \u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\uff0c\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u8207\u9818\u57df\u5c08\u5bb6\u9032\u884c\u5408\u4f5c\u5354\u52a9\u8cc7\u6599\u6e05\u8207\u8207\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u7e3d\u4e4b\u8981\u6210\u70ba\u4e00\u500b\u597d\u7684\u8cc7\u79d1\u5b78\u5bb6\u4e0a\u8ff0\u4e09\u7a2e\u4eba\u7684\u7279\u6027\u7f3a\u4e00\u4e0d\u53ef\u3002 \u6a5f\u5668\u5b78\u7fd2\u7a2e\u985e \u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u4f9d\u7167\u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u5927\u81f4\u53ef\u4ee5\u5206\u6210\u4ee5\u4e0b\u5e7e\u985e\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u7121\u9700\u6a19\u7c64/\u7b54\u6848\u5373\u53ef\u5b78\u7fd2 Ex: \u96c6\u7fa4 (Clustering) \u76e3\u7763\u5f0f\u5b78\u7fd2 \u9700\u8981\u6a19\u7c64/\u7b54\u6848\u624d\u80fd\u5b78\u7fd2 Ex: \u5206\u985e (Classification) \u3001 \u56de\u6b78 (Regression) \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 \u81ea\u76e3\u7763\u5b78\u7fd2 \u5f37\u5316\u5b78\u7fd2 \u5982\u4f55\u64f7\u53d6\u597d\u7684\u7279\u5fb5\u662f\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5f88\u91cd\u8981\u7684\u4e00\u4ef6\u4e8b \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u7c21\u55ae\u4f86\u8aaa\u6a5f\u5668\u5b78\u7fd2\u5c31\u662f\u8981\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u7576\u4e2d\u627e\u51fa\u4e00\u500b\u6578\u5b78\u6a21\u578b\u3002\u9019\u500b\u6578\u5b78\u6a21\u578b\u53ef\u4ee5\u7a31\u4f5c\u662f\u4e00\u500b f(x)=y \u5176\u4e2d x \u70ba\u8f38\u5165\u7684\u8cc7\u6599\uff0cy \u70ba\u8a72\u7b46\u8cc7\u6599\u6240\u76f8\u5c0d\u61c9\u7684\u8f38\u51fa\u3002\u5176\u4e2d f \u5373\u70ba\u51fd\u6578\uff0c\u4e5f\u5c31\u662f\u4efb\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u81f3\u65bc\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u6709\u54ea\u4e9b\u5462\uff1f\u4f8b\u5982\u7dda\u6027\u8ff4\u6b78\u3001\u908f\u8f2f\u56de\u6b78\u3001KNN\u3001SVM\u3001\u6c7a\u7b56\u6a39\u3001\u96a8\u6a5f\u68ee\u6797\u3001XGBoost......\u7b49\u3002\u4e4b\u5f8c\u7684\u7cfb\u5217\u6587\u7ae0\u90fd\u6703\u4f9d\u5e8f\u5411\u5404\u4f4d\u89e3\u91cb\u3002 \u4ec0\u9ebc\u662f\u8cc7\u6599? \u4e00\u822c\u4f86\u8aaa\u8cc7\u6599\u53ef\u4ee5\u5206\u6210\u5169\u500b\u90e8\u5206\u3002\u4ee5\u4e00\u500b\u5206\u985e\u7684\u554f\u984c\u4f86\u8aaa\uff0c\u5206\u5225\u6709\u8f38\u5165\u7684\u7279\u5fb5\u4ee5\u53ca\u8a72\u7b46\u8cc7\u6599\u76f8\u5c0d\u61c9\u7684\u7b54\u6848\u7a31\u4f5c\u6a19\u8a18\u3002AI \u9019\u500b\u9818\u57df\u5c31\u662f\u8b93\u6a5f\u5668\u6709\u5b78\u7fd2\u89e3\u6c7a\u554f\u984c\u7684\u80fd\u529b\uff0c\u800c\u4e0d\u662f\u6211\u5011\u544a\u8a34\u4ed6\u61c9\u8a72\u600e\u9ebc\u89e3\u6c7a\u554f\u984c\u3002\u6211\u5011\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u5047\u8a2d\u6211\u5011\u9700\u8981\u9810\u6e2c\u660e\u5929\u662f\u5426\u6703\u4e0b\u96e8\u3002\u6211\u5011\u7684\u8f38\u5165\u7279\u5fb5\u5c31\u53ef\u4ee5\u6709\u5404\u500b\u89c0\u6e2c\u7ad9\u7684\u96f2\u91cf\u8207\u6eab\u6fd5\u5ea6\u4f5c\u70ba\u6a21\u578b\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c\u6bcf\u4e00\u7b46\u7684\u5929\u6c23\u8cc7\u8a0a\u90fd\u5c0d\u61c9\u8457\u662f\u5426\u6703\u4e0b\u96e8\u7684\u6a19\u6e96\u7b54\u6848\u3002 \u7279\u5fb5 (Feature): \u7528\u4f86\u63cf\u8ff0\u6bcf\u4e00\u7b46\u8cc7\u6599\uff0c\u901a\u5e38\u6703\u7528 X \u4f86\u8868\u793a \u6a19\u8a18 (Label): \u7528\u4f86\u8868\u793a\u6bcf\u4e00\u7b46\u8cc7\u6599\u6240\u5c0d\u61c9\u7684\u8f38\u51fa\uff0c\u9019\u500b\u8f38\u51fa\u6a23\u5f0f\u53ef\u4ee5\u6709\u4e0d\u540c\u7684\u72c0\u614b(\u53ef\u80fd\u662f\u985e\u5225\u6216\u8005\u5be6\u6578\u503c\u7b49)\uff0c\u901a\u5e38\u6703\u7528 Y \u4f86\u8868\u793a\u3002 \u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b \u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u9996\u5148\u5b9a\u7fa9\u554f\u984c\uff0c\u7d93\u904e\u9700\u6c42\u8a0e\u8ad6\u8207\u8a55\u4f30\u5f8c\u6709\u500b\u660e\u78ba\u7684\u76ee\u6a19\u4e26\u958b\u59cb\u57f7\u884c\u5c08\u6848\u3002\u63a5\u8457\u958b\u59cb\u641c\u96c6\u8cc7\u6599\uff0c\u7531\u65bc\u5404\u5834\u57df\u6240\u6536\u96c6\u5230\u7684\u539f\u59cb\u6578\u64da\u53ef\u80fd\u5c1a\u672a\u6574\u7406\u4ee5\u53ca\u683c\u5f0f\u5c1a\u672a\u7d71\u4e00\u3002\u56e0\u6b64\u7b2c\u4e09\u6b65\u7684\u8cc7\u6599\u6e05\u7406\u6975\u70ba\u91cd\u8981\uff0c\u6709\u500b\u4e7e\u6de8\u7684\u8cc7\u6599\u53ef\u4ee5\u5c0d\u6a21\u578b\u8868\u73fe\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u8cc7\u6599\u4e00\u5207\u5c31\u7dd2\u5f8c\u5efa\u8b70\u5728\u5efa\u6a21\u4e4b\u524d\u5148\u5c0d\u8cc7\u6599\u9032\u884c\u8996\u89ba\u5316\u5206\u6790\uff0c\u4e26\u70ba\u6578\u64da\u505a\u524d\u8655\u7406\u4ee5\u53ca\u5c08\u696d\u77e5\u8b58\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u5c0d\u8cc7\u6599\u6709\u521d\u6b65\u7684\u8a8d\u8b58\u5f8c\uff0c\u63a5\u8457\u6311\u9078\u5408\u9069\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u8207\u8a55\u4f30\u6a21\u578b\u3002\u5728\u6a21\u578b\u6b63\u5f0f\u4e0a\u7dda\u4e4b\u524d\uff0c\u5148\u900f\u904e\u6e2c\u8a66\u96c6\u6216\u662f\u4ea4\u53c9\u9a57\u8b49\u7b49\u6a5f\u5236\u78ba\u8a8d\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u6a21\u578b\u78ba\u8a8d\u6c92\u6709\u554f\u984c\u5f8c\u5373\u53ef\u5c07\u6a21\u578b\u6253\u5305\u8f38\u51fa\uff0c\u4e26\u4e14\u8207\u5be6\u969b\u5834\u57df\u61c9\u7528\u9032\u884c\u6574\u5408\u3002\u6700\u7d42\u5c31\u662f\u90e8\u7f72\u6a21\u578b\u4ee5\u53ca\u7dad\u904b\uff0c\u6301\u7e8c\u5c07\u5834\u57df\u8490\u96c6\u5230\u7684\u65b0\u8cc7\u6599\u9032\u884c\u518d\u8a13\u7df4\uff0c\u5f62\u6210\u4e00\u500b\u958b\u767c\u5faa\u74b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#day-5","text":"","title":"[Day 5] \u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_1","text":"\u4e86\u89e3\u6a5f\u5668\u5b78\u7fd2\u662f\u4ec0\u9ebc \u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2? \u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587 \u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167? \u8cc7\u6599\u79d1\u5b78\u4e09\u528d\u5ba2 \u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u6709\u54ea\u4e9b\uff1f \u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2 \u8a8d\u8b58\u4ec0\u9ebc\u662f\u8cc7\u6599 \u6a5f\u5668\u5b78\u7fd2\u7684\u6d41\u7a0b","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_2","text":"\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u627e\u51fa\u89e3\u6c7a\u554f\u984c\u7684\u65b9\u6cd5\u3002\u7c21\u55ae\u4f86\u8aaa\u4f60\u53ea\u8981\u5c07\u5927\u91cf\u7684\u8cc7\u6599\u9935\u7d66\u96fb\u8166\uff0c\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u6703\u70ba\u4f60\u91cf\u8eab\u6253\u9020\u5b78\u7fd2\u51fa\u4e00\u500b\u7279\u5b9a\u7684\u6a21\u578b\u7d66\u4f60\uff0c\u800c\u4e0d\u662f\u518d\u900f\u904e\u4eba\u985e\u624b\u52d5\u7684\u7d66\u4e88\u898f\u5247\u3002\u900f\u904e\u4e00\u5806\u8cc7\u6599\u6709\u6a19\u7c64\u7d66\u7b54\u6848\uff0c\u4e26\u5f9e\u8cc7\u6599\u96c6\u5b78\u7fd2\u8207\u6a19\u8a18\u9593\u7684\u95dc\u806f\uff0c\u6700\u5f8c\u518d\u5f9e\u975e\u7279\u5b9a\u8cc7\u6599\u53bb\u8fa8\u8a8d\u7b54\u6848\u3002","title":"\u4f55\u8b02\u6a5f\u5668\u5b78\u7fd2?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_3","text":"\u5176\u5be6\u4eba\u5de5\u667a\u6167\u7684\u61c9\u7528\u5728\u73fe\u5be6\u751f\u6d3b\u4e2d\u96a8\u8655\u53ef\u898b\uff0c\u5f9e\u88fd\u9020\u3001\u91ab\u7642\u3001\u91d1\u878d\u3001\u4ea4\u901a\u3001\u5b89\u9632\u3001 \u96f6\u552e\u3001\u7269\u6d41\u3001\u8fb2\u696d......\u7b49\u90fd\u53ef\u4ee5\u770b\u5230\u8207 AI \u7684\u76f8\u95dc\u61c9\u7528\u3002\u7576\u7136\u4eba\u5de5\u667a\u6167\u7684\u51fa\u73fe\u4e26\u4e0d\u662f\u66c7\u82b1\u4e00\u73fe\uff0cArtificial Intelligence \u9019\u4e00\u8a5e\u5176\u5be6\u65e9\u5728 20 \u4e16\u7d00\u4e2d\u5c31\u88ab\u63d0\u51fa\uff0c\u8d77\u521d\u7576\u7136\u4e0d\u88ab\u770b\u597d\u751a\u81f3\u5927\u5bb6\u90fd\u89ba\u5f97\u8981\u4e00\u500b\u6a5f\u5668\u4eba\u5b78\u6703\u4eba\u985e\u7684\u667a\u6167\u662f\u5929\u65b9\u591c\u8b5a\u7684\u4e8b\u60c5\uff0c\u4e2d\u9593\u4e5f\u7d93\u6b77\u597d\u5e7e\u6b21 AI \u5bd2\u51ac\uff0c\u73fe\u5728\u56de\u904e\u982d\u4f86\u770b AI \u7684\u7814\u7a76\u9818\u57df\u8d77\u4f0f\u4f0f\u3002\u4e0d\u904e\u96a8\u8457\u8edf\u786c\u9ad4\u7684\u9032\u6b65\uff0c\u9010\u6f38\u4f7f\u5f97\u9700\u8981\u5927\u91cf\u8a08\u7b97\u7684\u4eba\u5de5\u667a\u6167\u6280\u8853\u6162\u6162\u7684\u88ab\u6316\u6398\u51fa\u4f86\u3002\u8fd1\u5e74\u4f86 AI \u65b0\u5275\u5982\u8207\u6625\u7b4d\u822c\u5192\u51fa\uff0c\u667a\u6167\u6a5f\u5668\u4eba\u3001\u611f\u77e5\u8b58\u5225\u3001\u81ea\u7136\u8a9e\u8a00\u8655\u7406\u3001\u5c0d\u8a71\u5ba2\u670d\u3001\u81ea\u52d5\u99d5\u99db\u3001\u7455\u75b5\u6aa2\u6e2c\u3001\u9810\u9632\u6027\u7dad\u4fee\u3001\u81ea\u52d5\u6d41\u7a0b\u63a7\u5236\u3001\u539f\u6599\u7d44\u5408\u6700\u4f73\u5316......\u7b49\u3002","title":"\u4eba\u5de5\u667a\u6167\u7684\u7bc4\u7587"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_4","text":"\u5176\u5be6\u4eba\u5de5\u667a\u6167\u9019\u9805\u9818\u57df\u53c8\u5206\u6210\u5f88\u591a\u9580\u6d3e\uff0c\u5f9e\u6700\u65e9\u7684\u7b26\u865f\u908f\u8f2f\u3001\u5c08\u5bb6\u7cfb\u7d71\u958b\u59cb\u8aaa\u8d77\u3002\u65e9\u671f\u7684 AI \u662f\u5c07\u4eba\u985e\u7684\u5c08\u5bb6\u77e5\u8b58\u900f\u904e\u77e5\u8b58\u5eab\u8207\u898f\u5247\u5eab\u653e\u5230\u6a5f\u5668\u4eba\u7684\u5927\u8166\u4e2d\uff0c\u4e26\u8ce6\u4e88\u6a5f\u5668\u4eba\u667a\u6167\u4f7f\u5f97\u6709\u80fd\u529b\u5224\u65b7\u4e8b\u7269\u3002\u7576\u7136\u4eba\u985e\u5c08\u5bb6\u7684\u77e5\u8b58\u59cb\u7d42\u6709\u9650\uff0c\u96a8\u8457\u7db2\u8def\u8207\u500b\u4eba\u96fb\u8166\u666e\u53ca\u4e26\u9032\u5165\u4e86\u5927\u6578\u64da\u6642\u4ee3\u3002\u5404\u500b\u79d1\u5b78\u5bb6\u65bc\u662f\u958b\u59cb\u601d\u8003\u5982\u4f55\u5c07\u9019\u4e9b\u641c\u96c6\u4f86\u7684\u5927\u91cf\u6578\u64da\u9032\u884c\u61c9\u7528\u8207\u5206\u6790\uff1f\u6a5f\u5668\u5b78\u7fd2\u4e00\u8a5e\u5c31\u51fa\u73fe\u4e86\uff0c\u76ee\u6a19\u662f\u900f\u904e\u73fe\u5be6\u751f\u6d3b\u4e2d\u6240\u6536\u96c6\u7684\u8cc7\u6599\uff0c\u642d\u914d\u5404\u7a2e\u4e0d\u540c\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u51fa\u4f86\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u6a5f\u5668\u4eba\u6709\u5224\u65b7\u8207\u9810\u6e2c\u7684\u80fd\u529b\u3002\u7576\u7136\u8fd1\u5e7e\u5e74\u71b1\u9580\u7684\u6df1\u5ea6\u5b78\u7fd2\u5176\u5be6\u50c5\u662f\u500b\u6a5f\u5668\u5b78\u7fd2\u88e1\u9762\u7684\u5176\u4e2d\u4e00\u7a2e\u5b78\u7fd2\u7684\u65b9\u6cd5\uff0c\u4ed6\u662f\u6a21\u4eff\u4eba\u985e\u7684\u795e\u7d93\u7cfb\u7d71\uff0c\u900f\u904e\u5927\u91cf\u7684\u795e\u7d93\u5143\u8207\u591a\u5c64\u7684\u795e\u7d93\u7db2\u8def\u5efa\u69cb\u51fa\u4f86\u7684\u8907\u96dc\u6578\u5b78\u6a21\u578b\u3002\u7136\u800c\u5728\u672c\u7cfb\u5217\u6559\u5b78\u4e2d\u6211\u5011\u6703\u5f9e\u6700\u57fa\u790e\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u958b\u59cb\u63d0\u8d77\uff0c\u4e26\u4e00\u6b65\u4e00\u6b65\u7684\u5e36\u9818\u8b80\u8005\u6210\u70ba\u4e00\u4f4d\u771f\u6b63\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u3002","title":"\u4ec0\u9ebc\u662f\u4eba\u5de5\u667a\u6167?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_5","text":"\u8cc7\u6599\u79d1\u5b78\u4e3b\u8981\u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u7684\u6280\u8853\uff0c\u8b93\u6a5f\u5668\u53ef\u4ee5\u9810\u6e2c\u6216\u8005\u63a8\u8ad6\u3002\u5176\u4e2d\u9019\u5e7e\u5e74\u5f88\u592f\u7684\u8cc7\u6599\u79d1\u5b78\u5bb6\u9019\u4e00\u540d\u8a5e\u5176\u5be6\u662f\u7531\u4e09\u7a2e\u4eba\u6240\u7d44\u5408\u8d77\u4f86\u7684\u3002\u7b2c\u4e00\u500b\u662f\u6578\u5b78\u8207\u7d71\u8a08\u80cc\u666f\u7684\u4eba\uff0c\u4ed6\u5011\u80fd\u5920\u900f\u904e\u5c0d\u8cc7\u6599\u7684\u654f\u611f\u5ea6\u5f9e\u4e00\u5927\u7fa4\u539f\u59cb\u8cc7\u6599\u4e2d\u63a2\u7d22\u6709\u610f\u7fa9\u7684\u8cc7\u8a0a\u3002\u4e26\u8a2d\u8a08\u4e00\u5957\u9069\u5408\u7684\u6a21\u578b\u70ba\u9019\u4e00\u7fa4\u8cc7\u6599\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u7b2c\u4e8c\u7a2e\u4eba\u662f\u96fb\u8166\u79d1\u5b78\u80cc\u666f\u7684\u5de5\u7a0b\u5e2b\uff0c\u4ed6\u5011\u64c5\u9577\u7a0b\u5f0f\u8a9e\u8a00\u80fd\u5920\u5c07\u8907\u96dc\u7684\u6578\u5b78\u6a21\u578b\u5920\u904e\u7a0b\u5f0f\u5be6\u4f5c\u4e26\u4e14\u5354\u52a9\u843d\u5730\u6574\u5408\u3002\u7576\u7136\u73fe\u4eca\u6709\u975e\u5e38\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u5957\u4ef6\u4f8b\u5982 Sklearn\u3001TensorFlow......\u7b49\uff0c\u964d\u4f4e\u4e86\u5927\u5bb6\u5b78\u7fd2\u7684\u9580\u6abb\uff0c\u4e0d\u4e00\u5b9a\u662f\u8981\u7406\u5de5\u80cc\u666f\u7684\u4eba\u90fd\u53ef\u4ee5\u900f\u904e\u9019\u4e9b\u6a5f\u5668\u5b78\u7fd2\u5957\u4ef6\u4e00\u7aba\u4eba\u5de5\u667a\u6167\u7684\u5967\u79d8\u3002\u9664\u6b64\u4e4b\u5916 MLOps \u662f\u8fd1\u5e74\u4f86\u5ef6\u4f38\u51fa\u4f86\u7684\u65b0\u540d\u8a5e\uff0c\u5176\u5be6\u6982\u5ff5\u8207 DevOps \u985e\u4f3c\u4e26\u5c07\u9019\u4e00\u5957\u6a5f\u5236\u8907\u88fd\u5728\u6a5f\u5668\u5b78\u7fd2\u5c08\u6848\u4e0a\uff0c\u6211\u5011\u5e73\u6642\u6240\u57f7\u884c\u7684 AI \u5c08\u6848\u5fc5\u9808\u900f\u904e\u6301\u7e8c\u6027\u6574\u5408\u8207\u7dad\u904b\u7684\u89c0\u5ff5\u4e0d\u65b7\u7684\u9031\u671f\u6027\u66f4\u65b0\u5f9e\u6700\u65b0\u6536\u96c6\u5230\u5f97\u6578\u64da\u91cd\u65b0\u5b78\u7fd2\u6a21\u578b\u8d8a\u4f86\u8d8a\u8cbc\u8fd1\u4f7f\u7528\u8005\u3002\u6700\u5f8c\u4e00\u500b\u95dc\u9375\u7684\u4eba\u7269\u5c31\u662f\u5404\u884c\u5404\u696d\u7684\u9818\u57df\u5c08\u5bb6\uff0c\u56e0\u70ba AI \u518d\u4e5f\u4e0d\u662f\u8cc7\u8a0a\u80cc\u666f\u4eba\u7684\u5c08\u5229\u3002\u6211\u5011\u53ef\u4ee5\u5920\u904e AI \u89e3\u6c7a\u65e5\u5e38\u751f\u6d3b\u4e2d\u7684\u554f\u984c\uff0c\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u8207\u9818\u57df\u5c08\u5bb6\u9032\u884c\u5408\u4f5c\u5354\u52a9\u8cc7\u6599\u6e05\u8207\u8207\u5efa\u7acb\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u7e3d\u4e4b\u8981\u6210\u70ba\u4e00\u500b\u597d\u7684\u8cc7\u79d1\u5b78\u5bb6\u4e0a\u8ff0\u4e09\u7a2e\u4eba\u7684\u7279\u6027\u7f3a\u4e00\u4e0d\u53ef\u3002","title":"\u8cc7\u6599\u79d1\u5b78 \u2715 \u4e09\u528d\u5ba2"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_6","text":"\u6a5f\u5668\u5b78\u7fd2\u662f\u4e00\u7a2e\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\uff0c\u662f\u4e00\u7a2e\u5f9e\u8cc7\u6599\u4e2d\u53bb\u5b78\u7fd2\u4e26\u627e\u51fa\u89e3\u6c7a\u65b9\u6cd5\u3002\u5176\u4f9d\u7167\u6a5f\u5668\u5b78\u7fd2\u7684\u7a2e\u985e\u5927\u81f4\u53ef\u4ee5\u5206\u6210\u4ee5\u4e0b\u5e7e\u985e\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u7121\u9700\u6a19\u7c64/\u7b54\u6848\u5373\u53ef\u5b78\u7fd2 Ex: \u96c6\u7fa4 (Clustering) \u76e3\u7763\u5f0f\u5b78\u7fd2 \u9700\u8981\u6a19\u7c64/\u7b54\u6848\u624d\u80fd\u5b78\u7fd2 Ex: \u5206\u985e (Classification) \u3001 \u56de\u6b78 (Regression) \u534a\u76e3\u7763\u5f0f\u5b78\u7fd2 \u81ea\u76e3\u7763\u5b78\u7fd2 \u5f37\u5316\u5b78\u7fd2 \u5982\u4f55\u64f7\u53d6\u597d\u7684\u7279\u5fb5\u662f\u5728\u6a5f\u5668\u5b78\u7fd2\u4e2d\u5f88\u91cd\u8981\u7684\u4e00\u4ef6\u4e8b","title":"\u6a5f\u5668\u5b78\u7fd2\u7a2e\u985e"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_7","text":"\u7c21\u55ae\u4f86\u8aaa\u6a5f\u5668\u5b78\u7fd2\u5c31\u662f\u8981\u5f9e\u4e00\u5927\u7fa4\u8cc7\u6599\u7576\u4e2d\u627e\u51fa\u4e00\u500b\u6578\u5b78\u6a21\u578b\u3002\u9019\u500b\u6578\u5b78\u6a21\u578b\u53ef\u4ee5\u7a31\u4f5c\u662f\u4e00\u500b f(x)=y \u5176\u4e2d x \u70ba\u8f38\u5165\u7684\u8cc7\u6599\uff0cy \u70ba\u8a72\u7b46\u8cc7\u6599\u6240\u76f8\u5c0d\u61c9\u7684\u8f38\u51fa\u3002\u5176\u4e2d f \u5373\u70ba\u51fd\u6578\uff0c\u4e5f\u5c31\u662f\u4efb\u4e00\u7a2e\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u3002\u81f3\u65bc\u5178\u578b\u7684\u6a5f\u5668\u5b78\u7fd2\u6a21\u578b\u6709\u54ea\u4e9b\u5462\uff1f\u4f8b\u5982\u7dda\u6027\u8ff4\u6b78\u3001\u908f\u8f2f\u56de\u6b78\u3001KNN\u3001SVM\u3001\u6c7a\u7b56\u6a39\u3001\u96a8\u6a5f\u68ee\u6797\u3001XGBoost......\u7b49\u3002\u4e4b\u5f8c\u7684\u7cfb\u5217\u6587\u7ae0\u90fd\u6703\u4f9d\u5e8f\u5411\u5404\u4f4d\u89e3\u91cb\u3002","title":"\u5f9e\u4eba\u985e\u5b78\u7fd2\u5230\u6a5f\u5668\u5b78\u7fd2"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_8","text":"\u4e00\u822c\u4f86\u8aaa\u8cc7\u6599\u53ef\u4ee5\u5206\u6210\u5169\u500b\u90e8\u5206\u3002\u4ee5\u4e00\u500b\u5206\u985e\u7684\u554f\u984c\u4f86\u8aaa\uff0c\u5206\u5225\u6709\u8f38\u5165\u7684\u7279\u5fb5\u4ee5\u53ca\u8a72\u7b46\u8cc7\u6599\u76f8\u5c0d\u61c9\u7684\u7b54\u6848\u7a31\u4f5c\u6a19\u8a18\u3002AI \u9019\u500b\u9818\u57df\u5c31\u662f\u8b93\u6a5f\u5668\u6709\u5b78\u7fd2\u89e3\u6c7a\u554f\u984c\u7684\u80fd\u529b\uff0c\u800c\u4e0d\u662f\u6211\u5011\u544a\u8a34\u4ed6\u61c9\u8a72\u600e\u9ebc\u89e3\u6c7a\u554f\u984c\u3002\u6211\u5011\u8209\u4e00\u500b\u7c21\u55ae\u7684\u4f8b\u5b50\uff0c\u5047\u8a2d\u6211\u5011\u9700\u8981\u9810\u6e2c\u660e\u5929\u662f\u5426\u6703\u4e0b\u96e8\u3002\u6211\u5011\u7684\u8f38\u5165\u7279\u5fb5\u5c31\u53ef\u4ee5\u6709\u5404\u500b\u89c0\u6e2c\u7ad9\u7684\u96f2\u91cf\u8207\u6eab\u6fd5\u5ea6\u4f5c\u70ba\u6a21\u578b\u8a13\u7df4\u7684\u8cc7\u6599\u3002\u800c\u6bcf\u4e00\u7b46\u7684\u5929\u6c23\u8cc7\u8a0a\u90fd\u5c0d\u61c9\u8457\u662f\u5426\u6703\u4e0b\u96e8\u7684\u6a19\u6e96\u7b54\u6848\u3002 \u7279\u5fb5 (Feature): \u7528\u4f86\u63cf\u8ff0\u6bcf\u4e00\u7b46\u8cc7\u6599\uff0c\u901a\u5e38\u6703\u7528 X \u4f86\u8868\u793a \u6a19\u8a18 (Label): \u7528\u4f86\u8868\u793a\u6bcf\u4e00\u7b46\u8cc7\u6599\u6240\u5c0d\u61c9\u7684\u8f38\u51fa\uff0c\u9019\u500b\u8f38\u51fa\u6a23\u5f0f\u53ef\u4ee5\u6709\u4e0d\u540c\u7684\u72c0\u614b(\u53ef\u80fd\u662f\u985e\u5225\u6216\u8005\u5be6\u6578\u503c\u7b49)\uff0c\u901a\u5e38\u6703\u7528 Y \u4f86\u8868\u793a\u3002","title":"\u4ec0\u9ebc\u662f\u8cc7\u6599?"},{"location":"5.\u6a5f\u5668\u5b78\u7fd2\u5927\u88dc\u5e16/#_9","text":"\u5b8c\u6574\u7684\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b\u5927\u81f4\u5206\u6210\u516b\u500b\u6b65\u9a5f\u3002\u9996\u5148\u5b9a\u7fa9\u554f\u984c\uff0c\u7d93\u904e\u9700\u6c42\u8a0e\u8ad6\u8207\u8a55\u4f30\u5f8c\u6709\u500b\u660e\u78ba\u7684\u76ee\u6a19\u4e26\u958b\u59cb\u57f7\u884c\u5c08\u6848\u3002\u63a5\u8457\u958b\u59cb\u641c\u96c6\u8cc7\u6599\uff0c\u7531\u65bc\u5404\u5834\u57df\u6240\u6536\u96c6\u5230\u7684\u539f\u59cb\u6578\u64da\u53ef\u80fd\u5c1a\u672a\u6574\u7406\u4ee5\u53ca\u683c\u5f0f\u5c1a\u672a\u7d71\u4e00\u3002\u56e0\u6b64\u7b2c\u4e09\u6b65\u7684\u8cc7\u6599\u6e05\u7406\u6975\u70ba\u91cd\u8981\uff0c\u6709\u500b\u4e7e\u6de8\u7684\u8cc7\u6599\u53ef\u4ee5\u5c0d\u6a21\u578b\u8868\u73fe\u6709\u5927\u5e45\u7684\u63d0\u5347\u3002\u8cc7\u6599\u4e00\u5207\u5c31\u7dd2\u5f8c\u5efa\u8b70\u5728\u5efa\u6a21\u4e4b\u524d\u5148\u5c0d\u8cc7\u6599\u9032\u884c\u8996\u89ba\u5316\u5206\u6790\uff0c\u4e26\u70ba\u6578\u64da\u505a\u524d\u8655\u7406\u4ee5\u53ca\u5c08\u696d\u77e5\u8b58\u7684\u7279\u5fb5\u5de5\u7a0b\u3002\u5c0d\u8cc7\u6599\u6709\u521d\u6b65\u7684\u8a8d\u8b58\u5f8c\uff0c\u63a5\u8457\u6311\u9078\u5408\u9069\u7684\u6a5f\u5668\u5b78\u7fd2\u6f14\u7b97\u6cd5\u8a13\u7df4\u8207\u8a55\u4f30\u6a21\u578b\u3002\u5728\u6a21\u578b\u6b63\u5f0f\u4e0a\u7dda\u4e4b\u524d\uff0c\u5148\u900f\u904e\u6e2c\u8a66\u96c6\u6216\u662f\u4ea4\u53c9\u9a57\u8b49\u7b49\u6a5f\u5236\u78ba\u8a8d\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u6a21\u578b\u78ba\u8a8d\u6c92\u6709\u554f\u984c\u5f8c\u5373\u53ef\u5c07\u6a21\u578b\u6253\u5305\u8f38\u51fa\uff0c\u4e26\u4e14\u8207\u5be6\u969b\u5834\u57df\u61c9\u7528\u9032\u884c\u6574\u5408\u3002\u6700\u7d42\u5c31\u662f\u90e8\u7f72\u6a21\u578b\u4ee5\u53ca\u7dad\u904b\uff0c\u6301\u7e8c\u5c07\u5834\u57df\u8490\u96c6\u5230\u7684\u65b0\u8cc7\u6599\u9032\u884c\u518d\u8a13\u7df4\uff0c\u5f62\u6210\u4e00\u500b\u958b\u767c\u5faa\u74b0\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u6a5f\u5668\u5b78\u7fd2\u6d41\u7a0b"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/","text":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4 \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u4f55\u8b02\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2? \u96c6\u7fa4\u5206\u6790? \u5206\u7fa4\u6f14\u7b97\u6cd5\u4ecb\u7d39 K-means \u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2(Un-supervised learning) \u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6c92\u6709\u6240\u8b02\u7684\u6a19\u6e96\u7b54\u6848\uff0c\u6545\u6a5f\u5668\u6703\u81ea\u5df1\u5f9e\u8cc7\u6599\u7fa4\u4e2d\u627e\u51fa\u4e00\u5957\u5206\u7fa4\u7684\u6cd5\u5247\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u512a\u9ede\u662f\u4e0d\u9700\u8981\u4e8b\u5148\u4ee5\u4eba\u529b\u6a19\u7c64\uff0c\u53ea\u7d66\u5b9a\u7279\u5fb5\u8b93\u6a5f\u5668\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u5e38\u898b\u7684\u975e\u76e3\u7763\u5f0f\u7684\u5206\u7fa4\u6f14\u7b97\u6cd5\u6709 K-means\uff0c\u5b83\u6839\u64da\u7269\u4ee5\u985e\u805a\u7684\u539f\u7406\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u628a\u8cc7\u6599\u6a23\u672c\u5206\u70ba K \u7fa4\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u6a21\u578b\u6642\u50c5\u9808\u5c0d\u6a5f\u5668\u63d0\u4f9b\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u5229\u7528\u5206\u7fa4\u6f14\u7b97\u6cd5\u81ea\u52d5\u5f9e\u9019\u4e9b\u7279\u5fb5\u4e2d\u627e\u51fa\u9130\u8fd1\u7684\u96c6\u7fa4\u4e2d\u5fc3\u4f5c\u70ba\u8a72\u985e\u5225\u3002 K-means \u6f14\u7b97\u6cd5 \u900f\u904e\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u6211\u5011\u80fd\u5920\u5c07\u591a\u7a2e\u7dad\u5ea6\u7684\u8cc7\u6599\u9032\u884c\u5206\u985e\u3002K-means \u6f14\u7b97\u6cd5\u7684\u6982\u5ff5\u5f88\u7c21\u55ae\u4e5f\u975e\u5e38\u5bb9\u6613\u5be6\u4f5c\uff0c\u50c5\u4e00\u822c\u52a0\u6e1b\u4e58\u9664\u5c31\u597d\u4e0d\u9700\u8907\u96dc\u7684\u8a08\u7b97\u516c\u5f0f\u3002 \u521d\u59cb\u5316: \u6307\u5b9a K \u500b\u5206\u7fa4\uff0c\u4e26\u96a8\u6a5f\u6311\u9078 K \u500b\u8cc7\u6599\u9ede\u7684\u503c\u7576\u4f5c\u7fa4\u7d44\u4e2d\u5fc3\u503c \u5206\u914d\u8cc7\u6599\u9ede: \u5c07\u6bcf\u500b\u8cc7\u6599\u9ede\u8a2d\u70ba\u8ddd\u96e2\u6700\u8fd1\u7684\u4e2d\u5fc3 \u8a08\u7b97\u5e73\u5747\u503c: \u91cd\u65b0\u8a08\u7b97\u6bcf\u500b\u5206\u7fa4\u7684\u4e2d\u5fc3\u9ede \u91cd\u8907\u6b65\u9a5f2\u30013\uff0c\u76f4\u5230\u8cc7\u6599\u9ede\u4e0d\u518d\u8b8a\u63db\u7fa4\u7d44\u70ba\u6b62 [\u7a0b\u5f0f\u5be6\u4f5c] \u8f09\u5165\u76f8\u95dc\u5957\u4ef6 import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris 1) \u8f09\u5165\u8cc7\u6599\u96c6 \u6211\u5011\u4eca\u5929\u8981\u5be6\u4f5c\u5206\u7fa4\u5206\u985e\u7684\u554f\u984c\uff0c\u56e0\u6b64\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u975e\u5e38\u9069\u5408\u7576\u4f5c\u7bc4\u4f8b\u3002\u5176\u8cc7\u6599\u96c6\u8f09\u5165\u65b9\u5f0f\u5728\u7b2c\u56db\u5929\u6709\u63d0\u904e\uff0c\u662f\u4e00\u6a23\u7684\u5167\u5bb9\uff01 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data K-Means K-means \u6f14\u7b97\u6cd5\u5728 Sklearn \u5957\u4ef6\u4e2d\u5df2\u7d93\u5e6b\u6211\u5011\u5c01\u88dd\u597d\u4e86\uff0c\u4f7f\u7528\u8005\u53ea\u8981\u547c\u53eb API \u5373\u53ef\u5c07\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u5feb\u901f\u5be6\u4f5c\u3002 Parameters: - n_cluster: K\u7684\u5927\u5c0f\uff0c\u4e5f\u5c31\u662f\u5206\u7fa4\u7684\u985e\u5225\u6578\u91cf\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u8a2d\u5b9a\u5e38\u6578\u80fd\u5920\u4fdd\u8b49\u6bcf\u6b21\u5206\u7fa4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba300\u4ee3\u3002 Attributes: - inertia_: inertia_\uff1afloat\uff0c\u6bcf\u500b\u9ede\u5230\u5176\u4ed6\u53e2\u96c6\u7684\u8cea\u5fc3\u7684\u8ddd\u96e2\u4e4b\u548c\u3002 - cluster_centers_\uff1a \u7279\u5fb5\u7684\u4e2d\u5fc3\u9ede [n_clusters, n_features] \u3002 Methods: - fit: K\u500b\u96c6\u7fa4\u5206\u985e\u6a21\u578b\u8a13\u7df4\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u985e\u5225\u3002 - fit_predict: \u5148\u547c\u53ebfit()\u505a\u96c6\u7fa4\u5206\u985e\uff0c\u4e4b\u5f8c\u5728\u547c\u53ebpredict()\u9810\u6e2c\u6700\u7d42\u985e\u5225\u4e26\u56de\u50b3\u8f38\u51fa\u3002 - transform: \u56de\u50b3\u7684\u9663\u5217\u6bcf\u4e00\u884c\u662f\u6bcf\u4e00\u500b\u6a23\u672c\u5230kmeans\u4e2d\u5404\u500b\u4e2d\u5fc3\u9ede\u7684L2(\u6b50\u5e7e\u91cc\u5f97)\u8ddd\u96e2\u3002 - fit_transform: \u5148\u547c\u53ebfit()\u518d\u57f7\u884ctransform()\u3002 from sklearn.cluster import KMeans kmeansModel = KMeans ( n_clusters = 3 , random_state = 46 ) clusters_pred = kmeansModel . fit_predict ( X ) \u8a55\u4f30\u5206\u7fa4\u7d50\u679c \u4f7f\u7528\u8005\u8a2d\u5b9a K \u500b\u5206\u7fa4\u5f8c\uff0c\u8a72\u6f14\u7b97\u6cd5\u5feb\u901f\u7684\u627e\u5230 K \u500b\u4e2d\u5fc3\u9ede\u4e26\u5b8c\u6210\u5206\u7fa4\u5206\u985e\u3002\u64ec\u5408\u597d\u6a21\u578b\u5f8c\u6211\u5011\u53ef\u4ee5\u8a08\u7b97\u5404\u500b\u6a23\u672c\u5230\u8a72\u7fa4\u7684\u4e2d\u5fc3\u9ede\u7684\u8ddd\u96e2\u4e4b\u5e73\u65b9\u548c\uff0c\u7528\u4f86\u8a55\u4f30\u96c6\u7fa4\u7684\u6210\u6548\uff0c\u5176 inertia \u8d8a\u5927\u4ee3\u8868\u8d8a\u5dee\u3002 kmeansModel . inertia_ \u8f38\u51fa\u7d50\u679c\uff1a 78.94084142614602 \u82e5\u8981\u67e5\u770b\u5404\u7fa4\u96c6\u7684\u4e2d\u5fc3\u9ede\uff0c\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u78bc\u3002 kmeansModel . cluster_centers_ \u8f38\u51fa\u7d50\u679c\uff1a array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097], [5.006 , 3.428 , 1.462 , 0.246 ], [6.85 , 3.07368421, 5.74210526, 2.07105263]]) \u5206\u985e\u7d50\u679c \u5982\u4f55\u6c7a\u5b9aK? \u7576\u4f60\u624b\u908a\u6709\u4e00\u7fa4\u8cc7\u6599\uff0c\u4e14\u7121\u6cd5\u4e00\u773c\u770b\u51fa\u6709\u591a\u5c11\u500b\u4e2d\u5fc3\u7684\u72c0\u6cc1\u3002\u53ef\u7528\u4f7f\u7528\u4e0b\u9762\u5169\u7a2e\u65b9\u6cd5\u505a k-means \u6a21\u578b\u8a55\u4f30\u3002 Inertia \u8a08\u7b97\u6240\u6709\u9ede\u5230\u6bcf\u7fa4\u96c6\u4e2d\u5fc3\u8ddd\u96e2\u7684\u5e73\u65b9\u548c\u3002 silhouette scores \u5074\u5f71\u51fd\u6578\u9a57\u8b49\u6578\u64da\u96c6\u7fa4\u5167\u4e00\u81f4\u6027\u7684\u65b9\u6cd5\u3002 \u4f7f\u7528 inertia \u505a\u6a21\u578b\u8a55\u4f30 \u7576K\u503c\u8d8a\u4f86\u8d8a\u5927\uff0cinertia \u6703\u96a8\u4e4b\u8d8a\u4f86\u8d8a\u5c0f\u3002\u6b63\u5e38\u60c5\u6cc1\u4e0b\u4e0d\u6703\u53d6K\u6700\u5927\u7684\uff0c\u4e00\u822c\u662f\u53d6 elbow point \u9644\u8fd1\u4f5c\u70ba K\uff0c\u5373 inertia \u8fc5\u901f\u4e0b\u964d\u8f49\u70ba\u5e73\u7de9\u7684\u90a3\u500b\u9ede\u3002 # k = 1~9 \u505a9\u6b21kmeans, \u4e26\u5c07\u6bcf\u6b21\u7d50\u679c\u7684inertia\u6536\u96c6\u5728\u4e00\u500blist\u88e1 kmeans_list = [ KMeans ( n_clusters = k , random_state = 46 ) . fit ( X ) for k in range ( 1 , 10 )] inertias = [ model . inertia_ for model in kmeans_list ] \u4f7f\u7528 silhouette scores \u505a\u6a21\u578b\u8a55\u4f30 \u53e6\u5916\u4e00\u500b\u65b9\u6cd5\u662f\u7528 silhouette scores \u53bb\u8a55\u4f30\uff0c\u5176\u5206\u6578\u8d8a\u5927\u4ee3\u8868\u5206\u7fa4\u6548\u679c\u8d8a\u597d\u3002 from sklearn.metrics import silhouette_score silhouette_scores = [ silhouette_score ( X , model . labels_ ) for model in kmeans_list [ 1 :]] \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#day-6-k-means","text":"","title":"[Day 6] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 K-means \u5206\u7fa4"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_1","text":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2 \u4f55\u8b02\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2? \u96c6\u7fa4\u5206\u6790? \u5206\u7fa4\u6f14\u7b97\u6cd5\u4ecb\u7d39 K-means \u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#un-supervised-learning","text":"\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u6c92\u6709\u6240\u8b02\u7684\u6a19\u6e96\u7b54\u6848\uff0c\u6545\u6a5f\u5668\u6703\u81ea\u5df1\u5f9e\u8cc7\u6599\u7fa4\u4e2d\u627e\u51fa\u4e00\u5957\u5206\u7fa4\u7684\u6cd5\u5247\u3002\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2\u7684\u512a\u9ede\u662f\u4e0d\u9700\u8981\u4e8b\u5148\u4ee5\u4eba\u529b\u6a19\u7c64\uff0c\u53ea\u7d66\u5b9a\u7279\u5fb5\u8b93\u6a5f\u5668\u60f3\u8fa6\u6cd5\u6703\u5f9e\u4e2d\u627e\u51fa\u898f\u5f8b\u3002\u5e38\u898b\u7684\u975e\u76e3\u7763\u5f0f\u7684\u5206\u7fa4\u6f14\u7b97\u6cd5\u6709 K-means\uff0c\u5b83\u6839\u64da\u7269\u4ee5\u985e\u805a\u7684\u539f\u7406\u76ee\u6a19\u662f\u6839\u64da\u7279\u5fb5\u628a\u8cc7\u6599\u6a23\u672c\u5206\u70ba K \u7fa4\u3002\u5176\u4e2d\u5728\u8a13\u7df4\u6a21\u578b\u6642\u50c5\u9808\u5c0d\u6a5f\u5668\u63d0\u4f9b\u8f38\u5165\u7684\u7279\u5fb5\uff0c\u4e26\u5229\u7528\u5206\u7fa4\u6f14\u7b97\u6cd5\u81ea\u52d5\u5f9e\u9019\u4e9b\u7279\u5fb5\u4e2d\u627e\u51fa\u9130\u8fd1\u7684\u96c6\u7fa4\u4e2d\u5fc3\u4f5c\u70ba\u8a72\u985e\u5225\u3002","title":"\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2(Un-supervised learning)"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k-means","text":"\u900f\u904e\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u6211\u5011\u80fd\u5920\u5c07\u591a\u7a2e\u7dad\u5ea6\u7684\u8cc7\u6599\u9032\u884c\u5206\u985e\u3002K-means \u6f14\u7b97\u6cd5\u7684\u6982\u5ff5\u5f88\u7c21\u55ae\u4e5f\u975e\u5e38\u5bb9\u6613\u5be6\u4f5c\uff0c\u50c5\u4e00\u822c\u52a0\u6e1b\u4e58\u9664\u5c31\u597d\u4e0d\u9700\u8907\u96dc\u7684\u8a08\u7b97\u516c\u5f0f\u3002 \u521d\u59cb\u5316: \u6307\u5b9a K \u500b\u5206\u7fa4\uff0c\u4e26\u96a8\u6a5f\u6311\u9078 K \u500b\u8cc7\u6599\u9ede\u7684\u503c\u7576\u4f5c\u7fa4\u7d44\u4e2d\u5fc3\u503c \u5206\u914d\u8cc7\u6599\u9ede: \u5c07\u6bcf\u500b\u8cc7\u6599\u9ede\u8a2d\u70ba\u8ddd\u96e2\u6700\u8fd1\u7684\u4e2d\u5fc3 \u8a08\u7b97\u5e73\u5747\u503c: \u91cd\u65b0\u8a08\u7b97\u6bcf\u500b\u5206\u7fa4\u7684\u4e2d\u5fc3\u9ede \u91cd\u8907\u6b65\u9a5f2\u30013\uff0c\u76f4\u5230\u8cc7\u6599\u9ede\u4e0d\u518d\u8b8a\u63db\u7fa4\u7d44\u70ba\u6b62","title":"K-means \u6f14\u7b97\u6cd5"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_2","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_3","text":"import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_iris","title":"\u8f09\u5165\u76f8\u95dc\u5957\u4ef6"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#1","text":"\u6211\u5011\u4eca\u5929\u8981\u5be6\u4f5c\u5206\u7fa4\u5206\u985e\u7684\u554f\u984c\uff0c\u56e0\u6b64\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u975e\u5e38\u9069\u5408\u7576\u4f5c\u7bc4\u4f8b\u3002\u5176\u8cc7\u6599\u96c6\u8f09\u5165\u65b9\u5f0f\u5728\u7b2c\u56db\u5929\u6709\u63d0\u904e\uff0c\u662f\u4e00\u6a23\u7684\u5167\u5bb9\uff01 iris = load_iris () df_data = pd . DataFrame ( data = np . c_ [ iris [ 'data' ], iris [ 'target' ]], columns = [ 'SepalLengthCm' , 'SepalWidthCm' , 'PetalLengthCm' , 'PetalWidthCm' , 'Species' ]) df_data","title":"1) \u8f09\u5165\u8cc7\u6599\u96c6"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k-means_1","text":"K-means \u6f14\u7b97\u6cd5\u5728 Sklearn \u5957\u4ef6\u4e2d\u5df2\u7d93\u5e6b\u6211\u5011\u5c01\u88dd\u597d\u4e86\uff0c\u4f7f\u7528\u8005\u53ea\u8981\u547c\u53eb API \u5373\u53ef\u5c07\u5206\u7fa4\u5206\u985e\u6f14\u7b97\u6cd5\u5feb\u901f\u5be6\u4f5c\u3002 Parameters: - n_cluster: K\u7684\u5927\u5c0f\uff0c\u4e5f\u5c31\u662f\u5206\u7fa4\u7684\u985e\u5225\u6578\u91cf\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\uff0c\u8a2d\u5b9a\u5e38\u6578\u80fd\u5920\u4fdd\u8b49\u6bcf\u6b21\u5206\u7fa4\u7d50\u679c\u90fd\u4e00\u6a23\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba300\u4ee3\u3002 Attributes: - inertia_: inertia_\uff1afloat\uff0c\u6bcf\u500b\u9ede\u5230\u5176\u4ed6\u53e2\u96c6\u7684\u8cea\u5fc3\u7684\u8ddd\u96e2\u4e4b\u548c\u3002 - cluster_centers_\uff1a \u7279\u5fb5\u7684\u4e2d\u5fc3\u9ede [n_clusters, n_features] \u3002 Methods: - fit: K\u500b\u96c6\u7fa4\u5206\u985e\u6a21\u578b\u8a13\u7df4\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u985e\u5225\u3002 - fit_predict: \u5148\u547c\u53ebfit()\u505a\u96c6\u7fa4\u5206\u985e\uff0c\u4e4b\u5f8c\u5728\u547c\u53ebpredict()\u9810\u6e2c\u6700\u7d42\u985e\u5225\u4e26\u56de\u50b3\u8f38\u51fa\u3002 - transform: \u56de\u50b3\u7684\u9663\u5217\u6bcf\u4e00\u884c\u662f\u6bcf\u4e00\u500b\u6a23\u672c\u5230kmeans\u4e2d\u5404\u500b\u4e2d\u5fc3\u9ede\u7684L2(\u6b50\u5e7e\u91cc\u5f97)\u8ddd\u96e2\u3002 - fit_transform: \u5148\u547c\u53ebfit()\u518d\u57f7\u884ctransform()\u3002 from sklearn.cluster import KMeans kmeansModel = KMeans ( n_clusters = 3 , random_state = 46 ) clusters_pred = kmeansModel . fit_predict ( X )","title":"K-Means"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_4","text":"\u4f7f\u7528\u8005\u8a2d\u5b9a K \u500b\u5206\u7fa4\u5f8c\uff0c\u8a72\u6f14\u7b97\u6cd5\u5feb\u901f\u7684\u627e\u5230 K \u500b\u4e2d\u5fc3\u9ede\u4e26\u5b8c\u6210\u5206\u7fa4\u5206\u985e\u3002\u64ec\u5408\u597d\u6a21\u578b\u5f8c\u6211\u5011\u53ef\u4ee5\u8a08\u7b97\u5404\u500b\u6a23\u672c\u5230\u8a72\u7fa4\u7684\u4e2d\u5fc3\u9ede\u7684\u8ddd\u96e2\u4e4b\u5e73\u65b9\u548c\uff0c\u7528\u4f86\u8a55\u4f30\u96c6\u7fa4\u7684\u6210\u6548\uff0c\u5176 inertia \u8d8a\u5927\u4ee3\u8868\u8d8a\u5dee\u3002 kmeansModel . inertia_ \u8f38\u51fa\u7d50\u679c\uff1a 78.94084142614602 \u82e5\u8981\u67e5\u770b\u5404\u7fa4\u96c6\u7684\u4e2d\u5fc3\u9ede\uff0c\u53ef\u4ee5\u53c3\u8003\u4ee5\u4e0b\u7a0b\u5f0f\u78bc\u3002 kmeansModel . cluster_centers_ \u8f38\u51fa\u7d50\u679c\uff1a array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097], [5.006 , 3.428 , 1.462 , 0.246 ], [6.85 , 3.07368421, 5.74210526, 2.07105263]])","title":"\u8a55\u4f30\u5206\u7fa4\u7d50\u679c"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#_5","text":"","title":"\u5206\u985e\u7d50\u679c"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#k","text":"\u7576\u4f60\u624b\u908a\u6709\u4e00\u7fa4\u8cc7\u6599\uff0c\u4e14\u7121\u6cd5\u4e00\u773c\u770b\u51fa\u6709\u591a\u5c11\u500b\u4e2d\u5fc3\u7684\u72c0\u6cc1\u3002\u53ef\u7528\u4f7f\u7528\u4e0b\u9762\u5169\u7a2e\u65b9\u6cd5\u505a k-means \u6a21\u578b\u8a55\u4f30\u3002 Inertia \u8a08\u7b97\u6240\u6709\u9ede\u5230\u6bcf\u7fa4\u96c6\u4e2d\u5fc3\u8ddd\u96e2\u7684\u5e73\u65b9\u548c\u3002 silhouette scores \u5074\u5f71\u51fd\u6578\u9a57\u8b49\u6578\u64da\u96c6\u7fa4\u5167\u4e00\u81f4\u6027\u7684\u65b9\u6cd5\u3002","title":"\u5982\u4f55\u6c7a\u5b9aK?"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#inertia","text":"\u7576K\u503c\u8d8a\u4f86\u8d8a\u5927\uff0cinertia \u6703\u96a8\u4e4b\u8d8a\u4f86\u8d8a\u5c0f\u3002\u6b63\u5e38\u60c5\u6cc1\u4e0b\u4e0d\u6703\u53d6K\u6700\u5927\u7684\uff0c\u4e00\u822c\u662f\u53d6 elbow point \u9644\u8fd1\u4f5c\u70ba K\uff0c\u5373 inertia \u8fc5\u901f\u4e0b\u964d\u8f49\u70ba\u5e73\u7de9\u7684\u90a3\u500b\u9ede\u3002 # k = 1~9 \u505a9\u6b21kmeans, \u4e26\u5c07\u6bcf\u6b21\u7d50\u679c\u7684inertia\u6536\u96c6\u5728\u4e00\u500blist\u88e1 kmeans_list = [ KMeans ( n_clusters = k , random_state = 46 ) . fit ( X ) for k in range ( 1 , 10 )] inertias = [ model . inertia_ for model in kmeans_list ]","title":"\u4f7f\u7528 inertia \u505a\u6a21\u578b\u8a55\u4f30"},{"location":"6.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2k-means\u5206\u7fa4/#silhouette-scores","text":"\u53e6\u5916\u4e00\u500b\u65b9\u6cd5\u662f\u7528 silhouette scores \u53bb\u8a55\u4f30\uff0c\u5176\u5206\u6578\u8d8a\u5927\u4ee3\u8868\u5206\u7fa4\u6548\u679c\u8d8a\u597d\u3002 from sklearn.metrics import silhouette_score silhouette_scores = [ silhouette_score ( X , model . labels_ ) for model in kmeans_list [ 1 :]] \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528 silhouette scores \u505a\u6a21\u578b\u8a55\u4f30"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/","text":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u964d\u7dad\u89c0\u5ff5 \u4f55\u8b02\u964d\u7dad? \u964d\u7dad\u6709\u4ec0\u9ebc\u512a\u9ede? \u5e38\u898b\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5 PCA & t-SNE \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u964d\u7dad (Dimension Reduction) \u4e00\u822c\u8cc7\u6599\u5e38\u898b\u7684\u8868\u793a\u65b9\u6cd5\u6709\u4e00\u7dad(\u6578\u7dda)\u3001\u4e8c\u7dad(XY\u5e73\u9762)\u548c\u4e09\u7dad(XYZ\u7acb\u9ad4)\u3002\u7576\u5927\u65bc\u4e09\u7dad\u7684\u8cc7\u6599\u5c31\u96e3\u4ee5\u8996\u89ba\u5316\u5448\u73fe\uff0c\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8868\u793a\u9ad8\u7dad\u5ea6\u7684\u8cc7\u6599\u540c\u6642\u53c8\u4e0d\u80fd\u58d3\u7e2e\u539f\u672c\u8cc7\u6599\u9593\u5f7c\u6b64\u7684\u95dc\u9023\u6027\u5462\uff1f\u9019\u6642\u964d\u7dad\u5c31\u80fd\u5e6b\u52a9\u4f60\u4e86\uff01\u964d\u7dad\u9867\u540d\u601d\u7fa9\uff0c\u5c31\u662f\u539f\u672c\u7684\u8cc7\u6599\u8655\u65bc\u5728\u4e00\u500b\u6bd4\u8f03\u9ad8\u7684\u7dad\u5ea6\u4f5c\u6a19\u4e0a\uff0c\u6211\u5011\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4f4e\u7dad\u5ea6\u7684\u4f5c\u6a19\u4f86\u63cf\u8ff0\u5b83\uff0c\u4f46\u53c8\u4e0d\u80fd\u5931\u53bb\u8cc7\u6599\u672c\u8eab\u7684\u7279\u8cea\u3002 \u70ba\u4ec0\u9ebc\u8981\u964d\u7dad? \u60f3\u60f3\u770b\u5982\u679c\u6211\u5011\u80fd\u5920\u628a\u4e00\u4e9b\u8cc7\u6599\u505a\u58d3\u7e2e\uff0c\u540c\u6642\u53c8\u80fd\u5920\u4fdd\u6301\u8cc7\u6599\u539f\u4f86\u7684\u7279\u6027\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u7528\u6bd4\u8f03\u5c11\u7684\u7a7a\u9593\uff0c\u6216\u662f\u8a08\u7b97\u6642\u7528\u6bd4\u8f03\u5c11\u7684\u8cc7\u6e90\u5c31\u53ef\u4ee5\u5f97\u5230\u8ddf\u6c92\u6709\u505a\u8cc7\u6599\u58d3\u7e2e\u4e4b\u524d\u5f97\u5230\u76f8\u4f3c\u7684\u7d50\u679c\u3002\u6b64\u5916\u8cc7\u6599\u964d\u7dad\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u9032\u884c\u8cc7\u6599\u8996\u89ba\u5316\uff0c\u4e8c\u7dad\u53ef\u4ee5\u7528\u5e73\u9762\u5716\u8868\u793a\u3001\u4e09\u7dad\u53ef\u4ee5\u7528\u7acb\u9ad4\u5716\u4f5c\u8868\u793a\uff0c\u800c\u5927\u65bc\u4e09\u7dad\u7684\u7a7a\u9593\u96e3\u4ee5\u8996\u89ba\u5316\u505a\u5448\u73fe\u3002 \u964d\u7dad\u6f14\u7b97\u6cd5 \u5e38\u898b\u7684\u964d\u7dad\u65b9\u6cd5\u6709\u5169\u7a2e\u5206\u5225\u6709\u7dda\u6027\u65b9\u6cd5\u7684\u4e3b\u6210\u5206\u5206\u6790(PCA)\u4ee5\u53ca\u975e\u7dda\u6027\u7684 t-\u96a8\u6a5f\u9130\u8fd1\u5d4c\u5165\u6cd5(t-SNE)\u3002\u4e0b\u5716\u4f8b\u5b50\u662f\u5c07 28*28 \u5927\u5c0f\u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\uff0c\u5206\u5225\u900f\u904e\u4e0a\u8ff0\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5\u5c07\u4e00\u5f35 784 \u500b\u50cf\u7d20\u7684\u5f71\u50cf\u964d\u6210 2 \u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u5ea7\u6a19\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe PCA \u964d\u70ba\u5f8c\u53ef\u4ee5\u5927\u81f4\u5c07 0~9 \u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\u5728\u5e73\u9762\u4e0a\u5206\u6210\u5341\u7fa4\uff0c\u4e0d\u904e\u5f7c\u6b64\u9593\u7684\u754c\u7dda\u9084\u662f\u5f88\u6a21\u7cca\u3002\u800c\u6211\u5011\u900f\u904e t-SNE \u65b9\u6cd5\u964d\u70ba\u5f8c\u53ef\u4ee5\u770b\u5230\u5e73\u9762\u4e0a\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5341\u500b\u6578\u5b57\u5206\u6210\u5341\u7fa4\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u624b\u5beb\u6578\u5b57\u7684\u5f71\u50cf\u5728\u975e\u7dda\u6027\u7684\u964d\u7dad\u8f49\u63db\u6548\u679c\u662f\u6bd4\u8f03\u597d\u7684\u3002 Principal component analysis (PCA) T-Distributed Stochastic Neighbor Embedding (t-SNE) \u56e0\u70ba t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\uff0c\u6b64\u5916 t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u56e0\u6b64\u5728\u9019\u7a2e 0~9 \u6709\u5341\u500b\u5206\u985e\u7684\u60c5\u6cc1\u4e0b\u53ef\u4ee5\u78ba\u4fdd\u5f7c\u6b64\u9593\u7684\u8ddd\u96e2\u6703\u88ab\u5340\u9694\u8a72\u800c\u4e0d\u6703\u91cd\u758a\u3002 Principal component analysis (PCA) \u4e3b\u6210\u4efd\u5206\u6790(Principal component analysis, PCA)\u3002\u5176\u4e3b\u8981\u76ee\u7684\u662f\u628a\u9ad8\u7dad\u7684\u9ede\u982d\u5f71\u5230\u4f4e\u7dad\u7684\u7a7a\u9593\u4e0a\uff0c\u4e26\u4e14\u4f4e\u7dad\u5ea6\u7684\u7a7a\u9593\u4fdd\u6709\u9ad8\u7dad\u7a7a\u9593\u4e2d\u5927\u90e8\u5206\u7684\u6027\u8cea\u3002\u900f\u904e\u5c07\u4e00\u500b\u5177\u6709 n \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u8f49\u63db\u70ba\u5177\u6709 k \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u5176\u4e2d k \u5fc5\u5b9a\u8981\u5c0f\u65bc n\u3002\u6b64\u5916 PCA \u53ea\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u6211\u5011\u5c07\u6350\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u3002\u5c07\u539f\u6709\u56db\u500b\u7279\u5fb5\u5206\u5225\u6709\u82b1\u74e3\u8207\u82b1\u843c\u7684\u9577\u8207\u5bec\uff0c\u900f\u904e\u7dda\u6027\u8f49\u63db\u6210\u5169\u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e09\u7a2e\u82b1\u7684\u985e\u5225\u5728\u5e73\u9762\u4e0a\u5404\u81ea\u90fd\u6709\u7dda\u6027\u7684\u8da8\u52e2\uff0c\u4e5f\u5c31\u662f\u5716\u4e2d\u7d05\u8272\u7684\u7dda\u689d\u3002 PCA\u7684\u4e3b\u8981\u6b65\u9a5f \u9996\u5148\u4e00\u958b\u59cb\u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5\uff0c\u4e5f\u5c31\u662f\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5e73\u5747\u3002\u63a5\u8457\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5\uff0c\u4e5f\u5c31\u662f\u505a\u8cc7\u6599\u9ede\u7684\u5e73\u79fb\uff0c\u5e73\u79fb\u5f8c\u539f\u9ede\u662f\u6240\u6709\u9ede\u7684\u4e2d\u5fc3\u3002\u7b2c\u4e09\u6b65\u8a08\u7b97\u7279\u5fb5\u5354\u65b9\u5dee\u77e9\u9663\uff0c\u5176\u4e2d\u77e9\u9663\u5c0d\u89d2\u7dda\u4e0a\u5206\u5225\u662f\u6bcf\u500b\u7279\u5fb5\u7684\u65b9\u5dee\uff0c\u800c\u975e\u5c0d\u89d2\u7dda\u4e0a\u7684\u6578\u503c\u662f\u4e0d\u540c\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5354\u65b9\u5dee\u3002\u5354\u65b9\u5dee\u662f\u8861\u91cf\u5169\u500b\u8b8a\u6578\u540c\u6642\u8b8a\u5316\u7684\u8b8a\u5316\u7a0b\u5ea6\uff0c\u5354\u65b9\u5dee\u7d55\u5c0d\u503c\u8d8a\u5927\u5169\u8005\u5c0d\u5f7c\u6b64\u7684\u5f71\u97ff\u8d8a\u5927\u3002\u7b2c\u56db\u6b65\u9a5f\u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3\uff0c\u8a08\u7b97\u5354\u65b9\u5dee\u77e9\u9663\u7684\u7279\u5fb5\u5411\u91cf\u548c\u7279\u5fb5\u503c\u4e26\u9078\u53d6\u7279\u5fb5\u5411\u91cf\u3002\u7b2c\u4e94\u6b65\u9a5f\u5c07\u7279\u5fb5\u503c\u7531\u5c0f\u5230\u5927\u6392\u5e8f\uff0c\u4e26\u9078\u53d6\u5176\u4e2d\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u3002\u7136\u5f8c\u5c07\u9019\u4e9b k \u500b\u7279\u5fb5\u5411\u91cf\u4f5c\u70ba\u7279\u5fb5\u5411\u91cf\u77e9\u9663\u3002\u6700\u5f8c\u5c0d\u8cc7\u6599\u96c6\u4e2d\u7684\u6bcf\u4e00\u500b\u7279\u5fb5\u8f49\u63db\u70ba\u65b0\u7684\u7279\u5fb5\u3002 \u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5 \u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5 \u8a08\u7b97\u7279\u5fb5\u7684\u5354\u65b9\u5dee\u77e9\u9663 \u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3 \u53d6\u51fa\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u503c\u5c0d\u61c9\u7684\u7279\u5fb5\u5411\u91cf \u5c07\u8cc7\u6599\u9ede\u6295\u5f71\u5230\u9078\u53d6\u7684\u7279\u5fb5\u5411\u91cf\u4e0a T-Distributed Stochastic Neighbor Embedding (t-SNE) t-SNE \u76ee\u6a19\u8ddf PCA \u662f\u4e00\u6a23\u7684\uff0c\u5b83\u5011\u90fd\u5e0c\u671b\u628a\u9ad8\u7dad\u7684\u8cc7\u6599\u6295\u5f71\u5230\u4f4e\u7dad\u4e2d\uff0c\u4e26\u4e14\u4fdd\u7559\u9ad8\u7dad\u4e2d\u7684\u9ede\u8207\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u8207\u7279\u6027\u3002\u5169\u8005\u4e0d\u540c\u7684\u9ede\u5728\u65bc t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\u3002\u56e0\u70ba t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u5c07\u9ad8\u7dad\u7684\u6578\u64da\u7528\u9ad8\u65af\u5206\u4f48\u7684\u6a5f\u7387\u5bc6\u5ea6\u51fd\u6578\u8fd1\u4f3c\uff0c\u800c\u4f4e\u7dad\u6578\u64da\u7684\u90e8\u5206\u4f7f\u7528 t \u5206\u4f48\u7684\u65b9\u5f0f\u4f86\u8fd1\u4f3c\u3002 PCA & t-SNE \u6574\u7406 PCA\u548ct-SNE\u662f\u5169\u500b\u4e0d\u540c\u964d\u7dad\u7684\u65b9\u6cd5\uff0cPCA\u7684\u512a\u9ede\u5728\u65bc\u7c21\u55ae\u82e5\u65b0\u7684\u9ede\u8981\u6620\u5c04\u6642\u76f4\u63a5\u4ee3\u5165\u516c\u5f0f\u5373\u53ef\u5f97\u51fa\u964d\u7dad\u5f8c\u7684\u9ede\u3002\u82e5t-SNE\u6709\u65b0\u7684\u9ede\u8fd1\u4f86\u6642\u6211\u5011\u6c92\u6709\u53bb\u8a08\u7b97\u65b0\u7684\u9ede\u548c\u820a\u7684\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u56e0\u6b64 \u6211\u5011\u7121\u6cd5\u5c07\u65b0\u7684\u9ede\u6295\u5f71\u4e0b\u53bb\u3002t-SNE\u7684\u512a\u9ede\u662f\u53ef\u4ee5\u4fdd\u7559\u539f\u672c\u9ad8\u7dad\u8ddd\u96e2\u8f03\u9060\u7684\u9ede\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301 \u9060\u7684\u8ddd\u96e2\uff0c\u56e0\u6b64\u9019\u4e9b\u7fa4\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301\u7fa4\u7684\u7279\u6027\u3002 PCA\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db t-SNE\u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db [\u7a0b\u5f0f\u5be6\u4f5c] PCA from sklearn.decomposition import PCA pca = PCA ( n_components = 2 , iterated_power = 1 ) train_reduced = pca . fit_transform ( X_train ) print ( 'PCA\u65b9\u5dee\u6bd4: ' , pca . explained_variance_ratio_ ) print ( 'PCA\u65b9\u5dee\u503c:' , pca . explained_variance_ ) t-SNE from sklearn.manifold import TSNE tsneModel = TSNE ( n_components = 2 , random_state = 42 , n_iter = 1000 ) train_reduced = tsneModel . fit_transform ( X_train ) t-SNE \u4e0d\u9069\u7528\u65bc\u65b0\u8cc7\u6599\u3002PCA \u964d\u7dad\u53ef\u4ee5\u9069\u7528\u65b0\u8cc7\u6599\uff0c\u53ef\u547c\u53ebtransform() \u51fd\u5f0f\u5373\u53ef\u3002\u800c t-SNE \u5247\u4e0d\u884c\u3002\u56e0\u70ba\u6f14\u7b97\u6cd5\u7684\u95dc\u4fc2\u5728 scikit-learn \u5957\u4ef6\u4e2d\u7684 t-SNE \u6f14\u7b97\u6cd5\u4e26\u6c92\u6709transform() \u51fd\u5f0f\u53ef\u4ee5\u547c\u53eb\u3002 Reference \u6df1\u5165\u5b78\u7fd2\u4e3b\u6210\u5206\u5206\u6790\uff08PCA\uff09\u6f14\u7b97\u6cd5\u539f\u7406\u53ca\u5176Python\u5be6\u73fe \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#day-7-","text":"","title":"[Day 7] \u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_1","text":"\u964d\u7dad\u89c0\u5ff5 \u4f55\u8b02\u964d\u7dad? \u964d\u7dad\u6709\u4ec0\u9ebc\u512a\u9ede? \u5e38\u898b\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5 PCA & t-SNE \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#dimension-reduction","text":"\u4e00\u822c\u8cc7\u6599\u5e38\u898b\u7684\u8868\u793a\u65b9\u6cd5\u6709\u4e00\u7dad(\u6578\u7dda)\u3001\u4e8c\u7dad(XY\u5e73\u9762)\u548c\u4e09\u7dad(XYZ\u7acb\u9ad4)\u3002\u7576\u5927\u65bc\u4e09\u7dad\u7684\u8cc7\u6599\u5c31\u96e3\u4ee5\u8996\u89ba\u5316\u5448\u73fe\uff0c\u90a3\u9ebc\u6211\u5011\u8a72\u5982\u4f55\u8868\u793a\u9ad8\u7dad\u5ea6\u7684\u8cc7\u6599\u540c\u6642\u53c8\u4e0d\u80fd\u58d3\u7e2e\u539f\u672c\u8cc7\u6599\u9593\u5f7c\u6b64\u7684\u95dc\u9023\u6027\u5462\uff1f\u9019\u6642\u964d\u7dad\u5c31\u80fd\u5e6b\u52a9\u4f60\u4e86\uff01\u964d\u7dad\u9867\u540d\u601d\u7fa9\uff0c\u5c31\u662f\u539f\u672c\u7684\u8cc7\u6599\u8655\u65bc\u5728\u4e00\u500b\u6bd4\u8f03\u9ad8\u7684\u7dad\u5ea6\u4f5c\u6a19\u4e0a\uff0c\u6211\u5011\u5e0c\u671b\u627e\u5230\u4e00\u500b\u4f4e\u7dad\u5ea6\u7684\u4f5c\u6a19\u4f86\u63cf\u8ff0\u5b83\uff0c\u4f46\u53c8\u4e0d\u80fd\u5931\u53bb\u8cc7\u6599\u672c\u8eab\u7684\u7279\u8cea\u3002","title":"\u964d\u7dad (Dimension Reduction)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_2","text":"\u60f3\u60f3\u770b\u5982\u679c\u6211\u5011\u80fd\u5920\u628a\u4e00\u4e9b\u8cc7\u6599\u505a\u58d3\u7e2e\uff0c\u540c\u6642\u53c8\u80fd\u5920\u4fdd\u6301\u8cc7\u6599\u539f\u4f86\u7684\u7279\u6027\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u7528\u6bd4\u8f03\u5c11\u7684\u7a7a\u9593\uff0c\u6216\u662f\u8a08\u7b97\u6642\u7528\u6bd4\u8f03\u5c11\u7684\u8cc7\u6e90\u5c31\u53ef\u4ee5\u5f97\u5230\u8ddf\u6c92\u6709\u505a\u8cc7\u6599\u58d3\u7e2e\u4e4b\u524d\u5f97\u5230\u76f8\u4f3c\u7684\u7d50\u679c\u3002\u6b64\u5916\u8cc7\u6599\u964d\u7dad\u53ef\u4ee5\u5e6b\u52a9\u6211\u5011\u9032\u884c\u8cc7\u6599\u8996\u89ba\u5316\uff0c\u4e8c\u7dad\u53ef\u4ee5\u7528\u5e73\u9762\u5716\u8868\u793a\u3001\u4e09\u7dad\u53ef\u4ee5\u7528\u7acb\u9ad4\u5716\u4f5c\u8868\u793a\uff0c\u800c\u5927\u65bc\u4e09\u7dad\u7684\u7a7a\u9593\u96e3\u4ee5\u8996\u89ba\u5316\u505a\u5448\u73fe\u3002","title":"\u70ba\u4ec0\u9ebc\u8981\u964d\u7dad?"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_3","text":"\u5e38\u898b\u7684\u964d\u7dad\u65b9\u6cd5\u6709\u5169\u7a2e\u5206\u5225\u6709\u7dda\u6027\u65b9\u6cd5\u7684\u4e3b\u6210\u5206\u5206\u6790(PCA)\u4ee5\u53ca\u975e\u7dda\u6027\u7684 t-\u96a8\u6a5f\u9130\u8fd1\u5d4c\u5165\u6cd5(t-SNE)\u3002\u4e0b\u5716\u4f8b\u5b50\u662f\u5c07 28*28 \u5927\u5c0f\u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\uff0c\u5206\u5225\u900f\u904e\u4e0a\u8ff0\u5169\u7a2e\u964d\u7dad\u65b9\u6cd5\u5c07\u4e00\u5f35 784 \u500b\u50cf\u7d20\u7684\u5f71\u50cf\u964d\u6210 2 \u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u5ea7\u6a19\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe PCA \u964d\u70ba\u5f8c\u53ef\u4ee5\u5927\u81f4\u5c07 0~9 \u7684\u624b\u5beb\u6578\u5b57\u7167\u7247\u5728\u5e73\u9762\u4e0a\u5206\u6210\u5341\u7fa4\uff0c\u4e0d\u904e\u5f7c\u6b64\u9593\u7684\u754c\u7dda\u9084\u662f\u5f88\u6a21\u7cca\u3002\u800c\u6211\u5011\u900f\u904e t-SNE \u65b9\u6cd5\u964d\u70ba\u5f8c\u53ef\u4ee5\u770b\u5230\u5e73\u9762\u4e0a\u5f88\u6e05\u695a\u7684\u5c07\u9019\u5341\u500b\u6578\u5b57\u5206\u6210\u5341\u7fa4\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5f97\u77e5\u624b\u5beb\u6578\u5b57\u7684\u5f71\u50cf\u5728\u975e\u7dda\u6027\u7684\u964d\u7dad\u8f49\u63db\u6548\u679c\u662f\u6bd4\u8f03\u597d\u7684\u3002 Principal component analysis (PCA) T-Distributed Stochastic Neighbor Embedding (t-SNE) \u56e0\u70ba t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\uff0c\u6b64\u5916 t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u56e0\u6b64\u5728\u9019\u7a2e 0~9 \u6709\u5341\u500b\u5206\u985e\u7684\u60c5\u6cc1\u4e0b\u53ef\u4ee5\u78ba\u4fdd\u5f7c\u6b64\u9593\u7684\u8ddd\u96e2\u6703\u88ab\u5340\u9694\u8a72\u800c\u4e0d\u6703\u91cd\u758a\u3002","title":"\u964d\u7dad\u6f14\u7b97\u6cd5"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#principal-component-analysis-pca","text":"\u4e3b\u6210\u4efd\u5206\u6790(Principal component analysis, PCA)\u3002\u5176\u4e3b\u8981\u76ee\u7684\u662f\u628a\u9ad8\u7dad\u7684\u9ede\u982d\u5f71\u5230\u4f4e\u7dad\u7684\u7a7a\u9593\u4e0a\uff0c\u4e26\u4e14\u4f4e\u7dad\u5ea6\u7684\u7a7a\u9593\u4fdd\u6709\u9ad8\u7dad\u7a7a\u9593\u4e2d\u5927\u90e8\u5206\u7684\u6027\u8cea\u3002\u900f\u904e\u5c07\u4e00\u500b\u5177\u6709 n \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u8f49\u63db\u70ba\u5177\u6709 k \u500b\u7279\u5fb5\u7a7a\u9593\u7684\u6a23\u672c\uff0c\u5176\u4e2d k \u5fc5\u5b9a\u8981\u5c0f\u65bc n\u3002\u6b64\u5916 PCA \u53ea\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db\u3002\u5982\u4e0b\u5716\u6240\u793a\uff0c\u6211\u5011\u5c07\u6350\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u9032\u884c PCA \u964d\u7dad\u3002\u5c07\u539f\u6709\u56db\u500b\u7279\u5fb5\u5206\u5225\u6709\u82b1\u74e3\u8207\u82b1\u843c\u7684\u9577\u8207\u5bec\uff0c\u900f\u904e\u7dda\u6027\u8f49\u63db\u6210\u5169\u7dad\u4e26\u6295\u5c04\u5728\u5e73\u9762\u4e0a\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u4e09\u7a2e\u82b1\u7684\u985e\u5225\u5728\u5e73\u9762\u4e0a\u5404\u81ea\u90fd\u6709\u7dda\u6027\u7684\u8da8\u52e2\uff0c\u4e5f\u5c31\u662f\u5716\u4e2d\u7d05\u8272\u7684\u7dda\u689d\u3002","title":"Principal component analysis (PCA)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca","text":"\u9996\u5148\u4e00\u958b\u59cb\u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5\uff0c\u4e5f\u5c31\u662f\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u7684\u5e73\u5747\u3002\u63a5\u8457\u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5\uff0c\u4e5f\u5c31\u662f\u505a\u8cc7\u6599\u9ede\u7684\u5e73\u79fb\uff0c\u5e73\u79fb\u5f8c\u539f\u9ede\u662f\u6240\u6709\u9ede\u7684\u4e2d\u5fc3\u3002\u7b2c\u4e09\u6b65\u8a08\u7b97\u7279\u5fb5\u5354\u65b9\u5dee\u77e9\u9663\uff0c\u5176\u4e2d\u77e9\u9663\u5c0d\u89d2\u7dda\u4e0a\u5206\u5225\u662f\u6bcf\u500b\u7279\u5fb5\u7684\u65b9\u5dee\uff0c\u800c\u975e\u5c0d\u89d2\u7dda\u4e0a\u7684\u6578\u503c\u662f\u4e0d\u540c\u7279\u5fb5\u9593\u5f7c\u6b64\u7684\u5354\u65b9\u5dee\u3002\u5354\u65b9\u5dee\u662f\u8861\u91cf\u5169\u500b\u8b8a\u6578\u540c\u6642\u8b8a\u5316\u7684\u8b8a\u5316\u7a0b\u5ea6\uff0c\u5354\u65b9\u5dee\u7d55\u5c0d\u503c\u8d8a\u5927\u5169\u8005\u5c0d\u5f7c\u6b64\u7684\u5f71\u97ff\u8d8a\u5927\u3002\u7b2c\u56db\u6b65\u9a5f\u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3\uff0c\u8a08\u7b97\u5354\u65b9\u5dee\u77e9\u9663\u7684\u7279\u5fb5\u5411\u91cf\u548c\u7279\u5fb5\u503c\u4e26\u9078\u53d6\u7279\u5fb5\u5411\u91cf\u3002\u7b2c\u4e94\u6b65\u9a5f\u5c07\u7279\u5fb5\u503c\u7531\u5c0f\u5230\u5927\u6392\u5e8f\uff0c\u4e26\u9078\u53d6\u5176\u4e2d\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u3002\u7136\u5f8c\u5c07\u9019\u4e9b k \u500b\u7279\u5fb5\u5411\u91cf\u4f5c\u70ba\u7279\u5fb5\u5411\u91cf\u77e9\u9663\u3002\u6700\u5f8c\u5c0d\u8cc7\u6599\u96c6\u4e2d\u7684\u6bcf\u4e00\u500b\u7279\u5fb5\u8f49\u63db\u70ba\u65b0\u7684\u7279\u5fb5\u3002 \u5148\u6c42\u51fa\u6240\u6709\u8cc7\u6599\u9ede\u4e2d\u5fc3 \u00b5 \u5c07\u6bcf\u4e00\u500b\u8cc7\u6599\u9ede\u6e1b\u53bb \u00b5 \u8a08\u7b97\u7279\u5fb5\u7684\u5354\u65b9\u5dee\u77e9\u9663 \u5c0d\u77e9\u9663\u9032\u884c\u7279\u5fb5\u503c\u5206\u89e3 \u53d6\u51fa\u6700\u5927\u7684 k \u500b\u7279\u5fb5\u503c\u5c0d\u61c9\u7684\u7279\u5fb5\u5411\u91cf \u5c07\u8cc7\u6599\u9ede\u6295\u5f71\u5230\u9078\u53d6\u7684\u7279\u5fb5\u5411\u91cf\u4e0a","title":"PCA\u7684\u4e3b\u8981\u6b65\u9a5f"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#t-distributed-stochastic-neighbor-embedding-t-sne","text":"t-SNE \u76ee\u6a19\u8ddf PCA \u662f\u4e00\u6a23\u7684\uff0c\u5b83\u5011\u90fd\u5e0c\u671b\u628a\u9ad8\u7dad\u7684\u8cc7\u6599\u6295\u5f71\u5230\u4f4e\u7dad\u4e2d\uff0c\u4e26\u4e14\u4fdd\u7559\u9ad8\u7dad\u4e2d\u7684\u9ede\u8207\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u8207\u7279\u6027\u3002\u5169\u8005\u4e0d\u540c\u7684\u9ede\u5728\u65bc t-SNE \u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db\u3002\u56e0\u70ba t-SNE \u4f7f\u7528\u4e86\u66f4\u8907\u96dc\u7684\u516c\u5f0f\u4f86\u8868\u9054\u9ad8\u7dad\u8207\u4f4e\u7dad\u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u4e3b\u8981\u662f\u5c07\u9ad8\u7dad\u7684\u6578\u64da\u7528\u9ad8\u65af\u5206\u4f48\u7684\u6a5f\u7387\u5bc6\u5ea6\u51fd\u6578\u8fd1\u4f3c\uff0c\u800c\u4f4e\u7dad\u6578\u64da\u7684\u90e8\u5206\u4f7f\u7528 t \u5206\u4f48\u7684\u65b9\u5f0f\u4f86\u8fd1\u4f3c\u3002","title":"T-Distributed Stochastic Neighbor Embedding (t-SNE)"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca-t-sne","text":"PCA\u548ct-SNE\u662f\u5169\u500b\u4e0d\u540c\u964d\u7dad\u7684\u65b9\u6cd5\uff0cPCA\u7684\u512a\u9ede\u5728\u65bc\u7c21\u55ae\u82e5\u65b0\u7684\u9ede\u8981\u6620\u5c04\u6642\u76f4\u63a5\u4ee3\u5165\u516c\u5f0f\u5373\u53ef\u5f97\u51fa\u964d\u7dad\u5f8c\u7684\u9ede\u3002\u82e5t-SNE\u6709\u65b0\u7684\u9ede\u8fd1\u4f86\u6642\u6211\u5011\u6c92\u6709\u53bb\u8a08\u7b97\u65b0\u7684\u9ede\u548c\u820a\u7684\u9ede\u4e4b\u9593\u7684\u95dc\u4fc2\u56e0\u6b64 \u6211\u5011\u7121\u6cd5\u5c07\u65b0\u7684\u9ede\u6295\u5f71\u4e0b\u53bb\u3002t-SNE\u7684\u512a\u9ede\u662f\u53ef\u4ee5\u4fdd\u7559\u539f\u672c\u9ad8\u7dad\u8ddd\u96e2\u8f03\u9060\u7684\u9ede\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301 \u9060\u7684\u8ddd\u96e2\uff0c\u56e0\u6b64\u9019\u4e9b\u7fa4\u964d\u7dad\u5f8c\u4f9d\u7136\u4fdd\u6301\u7fa4\u7684\u7279\u6027\u3002 PCA\u5141\u8a31\u7dda\u6027\u7684\u8f49\u63db t-SNE\u5141\u8a31\u975e\u7dda\u6027\u7684\u8f49\u63db","title":"PCA & t-SNE \u6574\u7406"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#_4","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#pca_1","text":"from sklearn.decomposition import PCA pca = PCA ( n_components = 2 , iterated_power = 1 ) train_reduced = pca . fit_transform ( X_train ) print ( 'PCA\u65b9\u5dee\u6bd4: ' , pca . explained_variance_ratio_ ) print ( 'PCA\u65b9\u5dee\u503c:' , pca . explained_variance_ )","title":"PCA"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#t-sne","text":"from sklearn.manifold import TSNE tsneModel = TSNE ( n_components = 2 , random_state = 42 , n_iter = 1000 ) train_reduced = tsneModel . fit_transform ( X_train ) t-SNE \u4e0d\u9069\u7528\u65bc\u65b0\u8cc7\u6599\u3002PCA \u964d\u7dad\u53ef\u4ee5\u9069\u7528\u65b0\u8cc7\u6599\uff0c\u53ef\u547c\u53ebtransform() \u51fd\u5f0f\u5373\u53ef\u3002\u800c t-SNE \u5247\u4e0d\u884c\u3002\u56e0\u70ba\u6f14\u7b97\u6cd5\u7684\u95dc\u4fc2\u5728 scikit-learn \u5957\u4ef6\u4e2d\u7684 t-SNE \u6f14\u7b97\u6cd5\u4e26\u6c92\u6709transform() \u51fd\u5f0f\u53ef\u4ee5\u547c\u53eb\u3002","title":"t-SNE"},{"location":"7.\u975e\u76e3\u7763\u5f0f\u5b78\u7fd2-\u964d\u7dad/#reference","text":"\u6df1\u5165\u5b78\u7fd2\u4e3b\u6210\u5206\u5206\u6790\uff08PCA\uff09\u6f14\u7b97\u6cd5\u539f\u7406\u53ca\u5176Python\u5be6\u73fe \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"Reference"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/","text":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression) ## \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 - \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 - \u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u4f86\u627e\u51fa\u4e00\u689d\u51fd\u5f0f\uff0c\u4f86\u6700\u4f73\u5316\u6a21\u578b - \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 - \u7dda\u6027\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b - \u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u3001\u591a\u5143\u8ff4\u6b78\u3001\u975e\u7dda\u6027\u8ff4\u6b78 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 \u7dda\u6027\u8ff4\u6b78\u662f\u7d71\u8a08\u4e0a\u5728\u627e\u591a\u500b\u81ea\u8b8a\u6578\u548c\u4f9d\u8b8a\u6578\u4e4b\u9593\u7684\u95dc\u4fc2\u6240\u5efa\u51fa\u4f86\u7684\u6a21\u578b\u3002\u53ea\u6709\u4e00\u500b\u81ea\u8b8a\u6578(x)\u548c\u4e00\u500b\u4f9d\u8b8a\u6578(y)\u7684\u60c5\u5f62\u7a31\u70ba\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u5927\u65bc\u4e00\u500b\u81ea\u8b8a\u6578(x 1 ,x 2 ,...)\u7684\u60c5\u5f62\u7a31\u70ba\u591a\u5143\u8ff4\u6b78\u3002 \u4e00\u500b\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78: y=ax+b\uff0c\u5176\u4e2d b\uff1a\u622a\u8ddd(Intercept)\uff0ca\uff1a\u659c\u7387(Slope) \u70ba x \u8b8a\u52d5\u4e00\u500b\u55ae\u4f4d y \u8b8a\u52d5\u7684\u91cf\uff0c\u5982\u4e0b\u5716: \u8ff4\u6b78\u5206\u6790\u7684\u76ee\u6a19\u51fd\u6578\u6216\u7a31\u640d\u5931\u51fd\u6578(loss function)\u5c31\u662f\u5e0c\u671b\u627e\u5230\u7684\u6a21\u578b\u6700\u7d42\u7684\u6b98\u5dee\u8d8a\u5c0f\u8d8a\u597d\uff0c\u4f86\u627e\u53c3\u6578 a \u548c b\u3002 \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 \u7dda\u6027\u6a21\u578b\u6700\u5e38\u898b\u7684\u89e3\u6cd5\u6709\u5169\u7a2e\uff0c\u5206\u5225\u70ba Closed-form (\u9589\u5f0f\u89e3) \u8207\u68af\u5ea6\u4e0b\u964d (Gradient descent)\u3002\u7576\u7279\u5fb5\u5c11\u6642\u4f7f\u7528 Closed-form \u8f03\u70ba\u9069\u5408\uff0c\u4f7f\u7528\u4e0b\u9762\u516c\u5f0f\u4f86\u6c42\u51fa \u03b8 \u503c\u3002\u6211\u5011\u53c8\u53ef\u4ee5\u8aaa\u7dda\u6027\u6a21\u578b\u7684\u6700\u5c0f\u5e73\u65b9\u6cd5\u7684\u89e3\u5373\u70ba Closed-form\u3002\u82e5\u7576\u662f\u8907\u96dc\u7684\u554f\u984c\u6642 Gradient descen \u8f03\u80fd\u89e3\u6c7a\uff0c\u5176\u539f\u56e0\u662f\u5927\u90e8\u5206\u7684\u554f\u984c\u5176\u5be6\u662f\u6c92\u6709\u516c\u5f0f\u89e3\u7684\u3002\u6211\u5011\u53ea\u80fd\u6c42\u51fa\u4e00\u500b\u51fd\u6578 f(x) \u4f7f\u5176\u8aa4\u5dee\u6700\u5c0f\u8d8a\u597d\u3002 Closed-form Gradient descent ## Least Square Method (\u6700\u5c0f\u5e73\u65b9\u6cd5) \u5047\u8a2d\u4e00\u500b\u5730\u5340\u7684\u623f\u50f9\u8207\u576a\u6578\u662f\u5448\u7dda\u6027\u95dc\u4fc2\uff0c\u4e26\u4ee5\u4e0b\u5716\u4e2d\u7684\u4e09\u500b\u9ede\u8868\u793a\u3002\u5982\u679c\u6211\u5011\u60f3\u900f\u904e\u623f\u5b50\u7684\u576a\u6578\u4f86\u9810\u6e2c\u623f\u50f9\uff0c\u90a3\u9ebc\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8207\u5ea7\u6a19\u5e73\u9762\u4e0a\u9019\u4e09\u500b\u9ede\u7684\u5dee\u8ddd\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9019\u689d\u76f4\u7dda\u8a72\u600e\u9ebc\u627e\u5462\uff1f\u9996\u5148\u6211\u5011\u96a8\u6a5f\u627e\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8a08\u7b97\u9019\u4e09\u9ede\u7684 loss\u3002\u640d\u5931\u51fd\u6578\u53ef\u4ee5\u81ea\u5df1\u5b9a\u7fa9\uff0c\u5047\u8a2d\u6211\u5011\u4f7f\u7528 MSE \u5747\u65b9\u8aa4\u5dee\u4f86\u8a08\u7b97\u3002\u900f\u904e\u4e00\u7cfb\u5217\u8a08\u7b97\u6211\u5011\u5f97\u5230\u4e00\u500b loss \u5373\u70ba MSE \u503c\u3002\u63a5\u8457\u6211\u5011\u5c07\u9019\u500b\u76f4\u7dda\u7a0d\u7a0d\u7684\u8f49\u4e00\u500b\u89d2\u5ea6\u5f8c\u53c8\u53ef\u4ee5\u8a08\u7b97\u4e00\u500b\u65b0\u7684 MSE\uff0c\u6b64\u523b\u6211\u5011\u53ef\u4ee5\u767c\u73fe MSE \u503c\u53c8\u6bd4\u525b\u525b\u66f4\u5c0f\u4e86\u3002\u4e5f\u5c31\u662f\u8aaa\u9019\u4e00\u689d\u65b0\u7684\u76f4\u7dda\u80fd\u5920\u66f4\u6cd5\u61c9\u51fa\u8a13\u7df4\u96c6\u4e2d A\u3001B\u3001C \u7684\u6578\u64da\u9ede\u6240\u53cd\u6620\u7684\u623f\u5c4b\u576a\u6578\u8207\u623f\u50f9\u4e4b\u9593\u7684\u7dda\u6027\u95dc\u4fc2\u3002 \u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\uff0c\u6211\u5011\u53ef\u4ee5\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\u3002\u73fe\u5728\u6211\u5011\u80fd\u505a\u7684\u4e8b\u60c5\u5c31\u662f\u5f9e\u9019\u7121\u6578\u689d\u76f4\u7dda\u4e2d\u9078\u51fa\u4e00\u689d\u6700\u4f73\u7684\u7576\u4f5c\u6211\u5011\u7684\u9810\u6e2c\u6a21\u578b\uff0c\u540c\u6642\u5b83\u9762\u5c0d\u9019\u4e09\u9ede\u7684\u8aa4\u5dee\u662f\u8981\u6700\u5c0f\u7684\u3002\u56e0\u6b64\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u8981\u6700\u5c0f\u5316 MSE \u4e5f\u5c31\u662f\u6240\u8b02\u7684\u640d\u5931\u51fd\u6578 (loss function)\u3002\u6240\u4ee5\u6574\u500b\u7dda\u6027\u8ff4\u6b78\u7684\u76ee\u6a19\u5c31\u662f\u6700\u5c0f\u5316\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\uff0c\u5176\u4e2d\u4e00\u500b\u89e3\u6cd5\u5c31\u662f\u6700\u5c0f\u5e73\u65b9\u6cd5\u3002\u56e0\u70ba MSE \u7b49\u65bc 1/n \u500d\u7684\u6b8b\u5dee\u5e73\u65b9\u548c (RSS)\uff0c\u5176\u4e2d\u5206\u6bcd n \u70ba\u5e38\u6578\uff0c\u4e0d\u5f71\u97ff\u6975\u5c0f\u5316\u6545\u62ff\u6389\u3002\u56e0\u6b64\u6700\u7d42\u7684\u6c42\u89e3\u662f\u6eff\u8db3\u6700\u5c0f\u5316\u5e73\u65b9\u548c\uff0c\u4f7f\u5176\u6700\u5c0f\u5316\u3002\u7d93\u904e\u6578\u5b78\u63a8\u5c0e\u5f8c\uff0c\u7c21\u5316\u7684\u516c\u5f0f\u5982\u4e0b\uff1a \u5c0f\u8a66\u8eab\u624b \u57fa\u65bc\u4e0a\u9762\u7684\u516c\u5f0f\u6211\u5011\u60f3\u627e\u51fa\u4e00\u7d44\u53c3\u6578\u6b0a\u91cd \u03b8\u3002\u4e5f\u5c31\u662f\u4e0b\u5716\u554f\u984c\u4e2d\u7684 a (\u03b8 0 )\u3001b (\u03b8 1 ) \u5169\u53c3\u6578\uff0c\u4f7f\u5f97\u5e73\u9762\u4e0a\u9019\u4e09\u9ede\u5e73\u65b9\u548c\u6709\u6975\u5c0f\u503c\u3002\u9019\u500b\u51fd\u5f0f\u5c0d \u03b8 0 , \u03b8 1 \u504f\u505a\u5fae\u5206\u8a2d\u4ed6\u5011\u70ba0\uff0c\u63a5\u8457\u6211\u5011\u5c0d\u65b9\u7a0b\u5f0f\u6c42\u89e3\u3002 \u6b64\u51fd\u5f0f\u53ea\u6709\u6975\u5c0f\u503c\uff0c\u56e0\u6b64\u6211\u5011\u5f97\u5230\u7684 \u03b8 0 , \u03b8 1 \u6700\u5c0f\u6975\u503c\u7684\u89e3\u3002 \u7bc4\u4f8b\u7a0b\u5f0f (\u623f\u50f9\u9810\u6e2c) \u624b\u523b\u7dda\u6027\u8ff4\u6b78 \u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5efa\u6a21\uff0c\u4e26\u63a1\u7528\u6700\u5c0f\u5e73\u6cd5\u3002\u9996\u5148\u70ba\u4e86\u8981\u9a57\u8b49\u6211\u5011\u4e0a\u9762\u7684\u516c\u5f0f\uff0c\u56e0\u6b64\u6211\u5011\u5148\u5229\u7528 Numpy \u5957\u4ef6\u81ea\u5df1\u624b\u523b\u505a\u4e00\u7cfb\u5217\u7684\u77e9\u9663\u904b\u7b97\u6c42\u51fa\u6bcf\u4e00\u9805\u7684\u4fc2\u6578\u8207\u622a\u8ddd\u3002 import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8a2d\u5b9a\u622a\u8ddd\u9805 b \u6b0a\u91cd\u503c\u70ba 1 b = np . ones (( X . shape [ 0 ], 1 )) # \u6dfb\u52a0\u5e38\u6578\u9805\u7279\u5fb5\uff0c\u6700\u7d42\u6709 13+1 \u500b\u8f38\u5165\u7279\u5fb5 X = np . hstack (( X , b )) # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a08\u7b97 Beta (@ \u70ba numpy \u4e2d 2-D arrays \u7684\u77e9\u9663\u4e58\u6cd5) Beta = np . linalg . inv ( X . T @ X ) @ X . T @ y y_pred = X @ Beta # MSE: 21.8948311817292 print ( 'MSE:' , mean_squared_error ( y_pred , y )) \u8a08\u7b97\u51fa\u4f86 Beta \u5f8c\u6211\u5011\u518d\u628a\u6240\u6709\u7684 X \u5e36\u5165\u4e26\u505a\u8a08\u7b97\uff0c\u7b97\u51fa\u4f86\u7684\u7d50\u679c MSE \u70ba 21.89\u3002\u6700\u5f8c\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u628a Beta \u8b8a\u6578\u5217\u5370\u51fa\u4f86\u3002\u7e3d\u5171\u6703\u6709 14 \u500b\u53c3\u6578\uff0c\u7531 13 \u500b\u8f38\u5165\u7279\u5fb5\u4fc2\u6578\u8207\u6700\u5f8c\u4e00\u9805\u622a\u8ddd\u6240\u7d44\u6210\u7684\u3002 \u8f38\u51fa\u7d50\u679c\uff1a array([-1.08011358e-01, 4.64204584e-02, 2.05586264e-02, 2.68673382e+00, -1.77666112e+01, 3.80986521e+00, 6.92224640e-04, -1.47556685e+00, 3.06049479e-01, -1.23345939e-02, -9.52747232e-01, 9.31168327e-03, -5.24758378e-01, 3.64594884e+01]) \u4f7f\u7528 Sklearn LinearRegression \u7dda\u6027\u8ff4\u6b78\u7c21\u55ae\u4f86\u8aaa\uff0c\u5c31\u662f\u5c07\u8907\u96dc\u7684\u8cc7\u6599\u6578\u64da\uff0c\u64ec\u548c\u81f3\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u5c31\u80fd\u65b9\u4fbf\u9810\u6e2c\u672a\u4f86\u7684\u8cc7\u6599\u3002\u63a5\u4e0b\u4f86\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Sklearn \u63d0\u4f9b\u7684 LinearRegression \u4f86\u6c42\u89e3\u3002 Parameters: - fit_intercept: \u662f\u5426\u6709\u622a\u8ddd\uff0c\u5982\u679c\u6c92\u6709\u5247\u76f4\u7dda\u904e\u539f\u9ede\u3002 Attributes: - coef_: \u53d6\u5f97\u4fc2\u6578\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: R2 score \u6a21\u578b\u8a55\u4f30\u3002 import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a13\u7df4\u6a21\u578b linearModel = LinearRegression () linearModel . fit ( X , y ) y_pred = linearModel . predict ( X ) # 21.894831181729202 print ( 'MSE:' , mean_squared_error ( y_pred , y )) Sklearn \u7684 LinearRegression \u6a21\u578b\u4e5f\u662f\u63a1\u7528\u5c0f\u5e73\u65b9\u6cd5\u6c42\u89e3\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5176 MSE \u8207\u7a0d\u65e9\u624b\u523b\u7684\u65b9\u6cd5\u76f8\u7576\u5f88\u63a5\u8fd1\u3002\u53e6\u5916 Sklearn \u6a21\u578b\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86 coef_ \u548c intercept_ \u5169\u500b\u5c6c\u6027\u53ef\u4ee5\u53d6\u5f97\u6a21\u578b\u7684\u7279\u5fb5\u4fc2\u6578\u8207\u622a\u8ddd\u3002 \u591a\u9805\u5f0f\u7684\u8ff4\u6b78\u6a21\u578b \u5c0d\u65bc\u7dda\u6027\u8ff4\u6b78\u4f86\u8aaa\uff0c\u8cc7\u6599\u90fd\u662f\u5f88\u5747\u52fb\u5730\u5206\u5e03\u5728\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u4f46\u73fe\u5be6\u7684\u8cc7\u6599\u5f80\u5f80\u662f\u975e\u7dda\u6027\u7684\u5206\u4f48\u3002\u5982\u679c\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u4e0a\u8ff0\u65b9\u6cd5\u53d6\u5f97\u7dda\u6027\u6a21\u578b\uff0c\u5728\u5be6\u969b\u5834\u57df\u4e0a\u9810\u6e2c\u6548\u679c\u53ef\u80fd\u4e26\u4e0d\u5927\u3002 \u591a\u9805\u5f0f\u8ff4\u6b78\u4e2d\uff0c\u6578\u64da\u4e0d\u592a\u5177\u6709\u7dda\u6027\u95dc\u4fc2\uff0c\u56e0\u6b64\u61c9\u5c0b\u627e\u4e00\u4e9b\u975e\u7dda\u6027\u66f2\u7dda\u53bb\u64ec\u5408\u3002\u5c0d\u65bc\u4ee5\u4e0a\u7684\u6578\u64da\uff0c\u539f\u672c\u662f\u53ea\u6709\u4e00\u500b x \u7279\u5fb5\uff0c\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u5efa\u69cb\u8a31\u591a\u65b0\u7684\u7279\u5fb5\u3002\u5982\u4e0b\u5716\uff0c\u7528\u4e00\u689d\u4e09\u6b21\u66f2\u7dda\u53bb\u64ec\u5408\u6578\u64da\u6548\u679c\u66f4\u597d\u3002\u6211\u5011\u5c07\u4e09\u6b21\u51fd\u6578\u770b\u6210 ax 3 +bx 2 +cx+d\u3002\u9019\u6a23\u5c31\u53c8\u8b8a\u6210\u89e3\u591a\u5143\uff0c\u5176\u6211\u5011\u5c31\u662f\u8981\u627e\u51fa a\u3001b\u3001c\u3001d \u4f7f\u5176\u640d\u5931\u51fd\u6578\u6700\u5c0f\u3002 \u7dda\u6027\u6a21\u578b\u7684\u64f4\u5c55 \u5f9e\u4e0a\u8ff0\u554f\u984c\u4e2d\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u8ff4\u6b78\u5728\u5be6\u52d9\u4e0a\u6240\u9762\u81e8\u7684\u554f\u984c\u3002\u9996\u5148\u6211\u5011\u4f86\u8ff4\u9867\u4e00\u4e0b\u7a0d\u65e9\u6240\u63d0\u5230\u7684\u7dda\u6027\u65b9\u7a0b\u5f0f\uff0c\u9019\u7d44\u7dda\u6027\u65b9\u7a0b\u5f0f\u8aaa\u660e\u4e86\u6bcf\u500b\u7279\u5fb5 x \u4e00\u6b21\u65b9\u8207\u76ee\u6a19\u503c\u662f\u6709\u4e00\u500b\u7dda\u6027\u7684\u95dc\u4fc2\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + ... + \u03b2 n x n \u63a5\u8457\u6211\u5011\u518d\u4f86\u770b\u4e00\u4e0b\u53e6\u4e00\u500b\u4f8b\u5b50\uff0c\u6bd4\u5982\u8aaa\u7279\u5fb5 x 1 \u8207\u76ee\u6a19\u503c\u5b58\u5728\u8457\u4ee5\u4e0b\u7684\u95dc\u4fc2\u3002\u6211\u5011\u767c\u73fe\u9019\u7d44\u65b9\u7a0b\u5f0f\u5df2\u7d93\u4e0d\u662f\u4e00\u500b\u7dda\u6027\u95dc\u4fc2\u4e86\uff0c\u56e0\u70ba\u4ed6\u6709\u4e86 x 1 \u7684\u4e8c\u6b21\u65b9\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 \u90a3\u9ebc\u8a72\u600e\u9ebc\u505a\u6211\u5011\u624d\u80fd\u53c8\u628a\u5b83\u8f49\u63db\u6210\u7dda\u6027\u95dc\u4fc2\u5462\uff1f\u9019\u6642\u5019\u6211\u5011\u5c31\u53ef\u4ee5\u7528\u4e00\u500b\u65b0\u7684\u7279\u5fb5 x 2 \u3002\u6211\u5011\u8b93 x 2 \u7b49\u65bc x 1 \u7684\u5e73\u65b9\uff0c\u9019\u6a23\u6211\u5011\u518d\u628a x 2 \u5e36\u8ff4\u539f\u65b9\u7a0b\u5f0f\u4e2d\u3002\u6b64\u6642\u9019\u5169\u500b\u7279\u5fb5 x 1 \u8207 x 2 \u8207\u76ee\u6a19\u503c\u53c8\u8ff4\u5230\u4e86\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 \u540c\u6a23\u7684\u6211\u5011\u518d\u4f86\u770b\u53e6\u4e00\u500b\u4f8b\u5b50\u3002\u6211\u5011\u5982\u679c\u5f15\u5165\u4e86 x 1 \u7684\u4e09\u6b21\u65b9\u7684\u8a71\uff0c\u4ed6\u7684\u65b9\u7a0b\u5f0f\u5982\u4e0b\uff1a y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 + \u03b2 3 x 1 3 \u540c\u7406\u6211\u5011\u9019\u6642\u4e00\u6a23\u53ef\u4ee5\u5f15\u5165\u65b0\u7684\u7279\u5fb5 x 2 \u7b49\u65bc x 1 \u7684\u4e8c\u6b21\u65b9\uff0c\u4ee5\u53ca x 3 \u7b49\u65bc x 1 \u7684\u4e09\u6b21\u65b9\u3002\u9019\u6a23\u7d93\u904e\u4e00\u500b\u8f49\u63db\u4ee5\u5f8c\u6211\u5011\u7684 y \u503c\u8207\u6240\u6709\u7684\u7279\u5fb5\u9593\u4f9d\u7136\u5b58\u5728\u8457\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 and x 3 = x 1 3 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + \u03b2 3 x 3 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u5f15\u5165\u8f49\u8b8a\u904e\u5f8c\u7684 x \u4f5c\u70ba\u4e00\u500b\u65b0\u7684\u7279\u5fb5\u4f86\u6eff\u8db3\u7dda\u6027\u5047\u8a2d\u3002\u6b64\u6642\u7684\u8ff4\u6b78\u65b9\u7a0b\u5f0f\u5c31\u662f\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78(polynomial regression)\u3002 Sklearn \u5be6\u4f5c\u591a\u9805\u5f0f\u8ff4\u6b78 \u7531\u65bc Sklearn \u6c92\u6709\u5c01\u88dd\u597d\u7684\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb\u3002\u4e0d\u904e\u6211\u5011\u53ef\u4ee5\u900f\u904e make_pipeline \u5c07 PolynomialFeatures \u8207 LinearRegression \u5c01\u88dd\u6210\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\uff0c\u4e26\u4e14\u4f7f\u7528\u8005\u53ef\u4ee5\u96a8\u610f\u8a2d\u5b9a degree(\u6b21\u65b9)\u503c\u3002 \u6211\u5011\u53ef\u4ee5\u5c0d\u539f\u672c\u7684\u7279\u5fb5\u9032\u884c PolynomialFeatures \u69cb\u9020\u65b0\u6a23\u672c\u7279\u5fb5\u63a1\u3002\u4e26\u5c07\u8f49\u63db\u5f8c\u7684\u7279\u5fb5\u9001\u5230\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u9032\u884c\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u81ea\u5b9a\u7fa9\u4e00\u500b PolynomialRegression() \u7684\u51fd\u5f0f\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u8f38\u5165 degree \u5927\u5c0f\u63a7\u5236\u6a21\u578b\u7684\u5f37\u5ea6\u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u4f7f\u7528 Sklearn \u7684 pipeline \u65b9\u6cd5\u5c07 PolynomialFeatures \u7279\u5fb5\u8f49\u63db\u8207 LinearRegression \u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5c01\u88dd\u8d77\u4f86\u3002\u53e6\u5916\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u900f\u904e\u81ea\u8a02\u7fa9\u7684 make_data() \u51fd\u5f0f\u7522\u751f\u4e00\u7d44\u96a8\u6a5f\u7684 x \u548c y\u3002\u8a72\u51fd\u5f0f\u4e2d\u53ef\u4ee5\u8a2d\u5b9a\u96a8\u6a5f\u8cc7\u6599\u7684\u6bd4\u6578\uff0c\u4e0b\u9762\u7a0b\u5f0f\u4e2d\u6211\u5011\u5148\u96a8\u6a5f\u5efa\u7acb 100 \u7b46\u6578\u64da\u3002 from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline plt . style . use ( 'seaborn' ) # make_pipeline\u662f\u6307\u53ef\u4ee5\u5c07\u591a\u500bSklearn\u7684function\u4e00\u8d77\u57f7\u884c def PolynomialRegression ( degree = 2 , ** kwargs ): return make_pipeline ( PolynomialFeatures ( degree ), LinearRegression ( ** kwargs )) # \u96a8\u6a5f\u5b9a\u7fa9\u65b0\u7684x,y\u503c def make_data ( N , err = 1 , rseed = 42 ): rng = np . random . RandomState ( rseed ) x = rng . rand ( N , 1 ) ** 2 y = 10 - 1 / ( x . ravel () + 0.1 ) if err > 0 : y += err * rng . randn ( N ) return x , y X , y = make_data ( 100 ) \u8a13\u7df4\u8cc7\u6599\u8207\u6e2c\u8a66\u8cc7\u6599\u90fd\u5efa\u7acb\u5b8c\u6210\u5f8c\u3002\u6211\u5011\u5c31\u53ef\u4ee5\u5c07\u8a13\u7df4\u8cc7\u6599\u4e1f\u5165\u5efa\u7acb\u597d\u7684 PolynomialRegression() \u4e26\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u4e0b\u9762\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u6f14\u793a degree \u7b49\u65bc 1\u30013\u30019\uff0c\u4e26\u4f86\u67e5\u770b\u96a8\u8457\u6b21\u65b9\u6578\u7684\u589e\u9577\u5c0d\u65bc\u6a21\u578b\u7684\u64ec\u5408\u7a0b\u5ea6\u7684\u5f71\u97ff\u3002 # \u6e2c\u8a66\u8cc7\u6599\u96c6 x_test = np . linspace ( - 0.1 , 1.1 , 500 )[:, None ] # \u7e6a\u88fd\u771f\u5be6\u7b54\u6848\u7684\u5206\u4f48 plt . scatter ( X . ravel (), y , color = 'black' ) # \u6e2c\u8a66 1,3,7 \u7684degree for degree in [ 1 , 3 , 9 ]: y_test = PolynomialRegression ( degree ) . fit ( X , y ) . predict ( x_test ) plt . plot ( x_test . ravel (), y_test , label = 'degree= {} ' . format ( degree )) plt . xlim ( - 0.1 , 1.0 ) plt . ylim ( - 2 , 12 ) plt . legend ( loc = 'best' ) \u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u96a8\u8457\u6b21\u65b9\u6578 degree \u7684\u589e\u9577\u6a21\u578b\u6703\u8b8a\u5f97\u8d8a\u8907\u96dc\u3002\u540c\u6642\u5c0d\u65bc\u8a13\u7df4\u6578\u64da\u7684\u64ec\u5408\u7d50\u679c\u8d8a\u597d\u3002\u4f46\u662f\u9019\u88e1\u5fc5\u9808\u6ce8\u610f\u4e26\u975e\u8d8a\u5927\u7684 degree \u5c31\u662f\u8d8a\u597d\u7684\uff0c\u56e0\u70ba\u96a8\u8457\u6a21\u578b\u8907\u96dc\u6703\u6709\u904e\u5ea6\u64ec\u5408\u7684\u8de1\u8c61\u3002\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u627e\u51fa\u4e00\u500b\u9069\u7576\u7684 degree \u6578\u503c\u4e26\u8207\u6e2c\u8a66\u96c6\u9a57\u8b49\u8207\u8a55\u4f30\u3002\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684 MSE \u5dee\u8ddd\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u5982\u679c\u6211\u5011\u4e00\u6627\u7684\u8ffd\u6c42\u8a13\u7df4\u96c6\u7684\u640d\u5931\u6700\u5c0f\u5316\uff0c\u53ef\u80fd\u6703\u5f71\u97ff\u5230\u6e2c\u8a66\u96c6\u7684\u8868\u73fe\u80fd\u529b\u5c0e\u81f4\u9810\u6e2c\u7d50\u679c\u8b8a\u5dee\u3002 Gradient descent (\u68af\u5ea6\u4e0b\u964d\u6cd5) \u63a5\u4e0b\u4f86\u6211\u5011\u4f86\u8a0e\u8ad6\u512a\u5316\u554f\u984c\u7684\u7b2c\u4e8c\u7a2e\u65b9\u6cd5\uff0c\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002\u68af\u5ea6\u4e0b\u964d\u4e0d\u50c5\u9650\u65bc\u7dda\u6027\u8ff4\u6b78\uff0c\u5728\u975e\u7dda\u6027\u548c\u795e\u7d93\u7db2\u7d61\u540c\u6a23\u9069\u7528\u3002\u4e0b\u5716\u4e2d\u6bcf\u4e00\u500b\u9ede\u662f\u8a13\u7df4\u96c6\u7684\u6a23\u672c x \u8ef8\u70ba\u8f38\u5165\u503c y \u8ef8\u70ba\u8f38\u51fa\u503c\u3002\u4e5f\u5c31\u662f\u5e73\u9762\u4e0a\u6bcf\u500b\u9ede x \u90fd\u6703\u6709\u4e00\u500b\u76f8\u5c0d\u61c9 y \u7684\u8f38\u51fa\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u505a\u7684\u4e8b\u60c5\u662f\u70ba\u9019\u4e9b\u9ede\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u9019\u689d\u76f4\u7dda\u80fd\u5920\u76e1\u53ef\u80fd\u53cd\u6620\u51fa x \u8207 y \u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u6b64\u5916\u6211\u5011\u90fd\u77e5\u9053\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\u6211\u5011\u80fd\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\uff0c\u90a3\u6211\u5011\u8a72\u5982\u4f55\u627e\u5230\u9019\u689d\u6700\u4f73\u7684\u76f4\u7dda\u5462\uff1f\u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u4f7f\u5f97\u9019\u4e9b\u8a13\u7df4\u8cc7\u6599\u4e2d\u7684\u6bcf\u500b\u6a23\u672c\u9ede\u5230\u9019\u4e00\u689d\u76f4\u7dda\u7684\u8ddd\u96e2\u5e73\u65b9\u548c\u8981\u6700\u5c0f\u3002\u56e0\u6b64\u9019\u88e1\u6211\u5011\u5c07\u8a0e\u8ad6\u8a72\u5982\u4f55\u4f7f\u7528\u68af\u5ea6\u4e0b\u964d\u6cd5\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u9996\u5148\u6211\u5011\u5047\u8a2d\u4e00\u500b\u76f4\u7dda\u7684\u65b9\u7a0b\u5f0f\u662f y = \u03b2 0 + \u03b2 1 x\u3002\u90a3\u9996\u5148\u6211\u5011\u53ef\u4ee5\u5148\u96a8\u6a5f\u7684\u7d66\u4e88 \u03b2 0 \u548c \u03b2 1 \u4e00\u500b\u521d\u59cb\u503c\u3002\u4e26\u5f97\u5230\u4e0b\u5716\u4e2d\u7684\u7d50\u679c\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u9019\u4e00\u689d\u76f4\u7dda\u4e26\u4e0d\u80fd\u53cd\u6620\u51fa x \u548c y \u7684\u95dc\u806f\u6027\u3002 \u5982\u679c\u6211\u5011\u4e0d\u65b7\u7684\u8fed\u4ee3\uff0c\u6bcf\u4e00\u6b21\u7684\u8fed\u4ee3\u90fd\u8b93\u9019\u4e00\u689d\u76f4\u7dda\u671d\u8457\u66f4\u7b26\u5408\u6578\u64da\u9ede\u7684\u65b9\u5411\u79fb\u52d5\u4e00\u9ede\uff0c\u90a3\u9ebc\u7d93\u904e\u8a31\u591a\u6b21\u7684\u66f4\u65b0\u6211\u5011\u5c31\u53ef\u4ee5\u5f97\u5230\u6700\u4f73\u7684\u7d50\u679c\u3002\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u8981\u66f4\u65b0\u6240\u6709\u7684\u53c3\u6578\uff0c\u4f8b\u5982\uff1a \u03b2 0 \u548c \u03b2 1 \uff0c\u76f4\u5230\u5f97\u5230\u6700\u5c0f\u7684 MSE \u6216\u662f\u9810\u5b9a\u7684\u8fed\u4ee3\u6b21\u6578\u3002\u4ee5\u4e0b\u7684\u516c\u5f0f\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u7684\u8868\u9054\u5f0f\u3002\u5b83\u53cd\u6620\u7684\u662f\u6bcf\u6b21\u8fed\u4ee3\uff0c\u6211\u5011\u7684 \u03b2 0 \u548c \u03b2 1 \u9019\u4e9b\u53c3\u6578\u662f\u5982\u4f55\u8abf\u6574\u7684\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u9019\u500b\u516c\u5f0f\u5f97\u77e5\uff0c\u4ed6\u662f\u5c0d\u640d\u5931\u51fd\u6578\u6c42\u4e86\u67d0\u4e00\u500b\u7279\u5b9a\u53c3\u6578\u7684\u504f\u5c0e\u3002\u9019\u5c31\u662f\u6240\u8b02\u7684\u68af\u5ea6\uff0c\u6211\u5011\u671d\u8457\u68af\u5ea6\u7684\u53cd\u65b9\u5411\u5728\u66f4\u65b0\u3002\u7136\u800c\u6bcf\u4e00\u6b21\u8981\u66f4\u65b0\u591a\u5927\u53ef\u4ee5\u4f9d\u9760 \u03b7\uff08(eta) \u4f86\u63a7\u5236\uff0c\u56e0\u6b64\u6211\u5011\u7b97\u51fa\u4f86\u7684\u68af\u5ea6\u9084\u6703\u4e58\u4e0a\u4e00\u500b\u5b78\u7fd2\u901f\u7387\u4f86\u9632\u6b62\u66f4\u65b0\u6b65\u4f10\u592a\u5927\u800c\u5c0e\u81f4\u627e\u4e0d\u5230\u89e3\u3002\u6240\u4ee5 \u03b7 \u7684\u5927\u5c0f\u8981\u9069\u4e2d\u4ee5\u514d\u5f71\u97ff\u5230\u6a21\u578b\u6700\u7d42\u7684\u6536\u6582\u3002 \u6b64\u5916\u9019\u500b\u6a21\u578b\u5982\u679c\u900f\u904e\u68af\u5ea6\u4e0b\u964d\u6cd5\u9084\u6709\u4e00\u500b\u7f3a\u9ede\uff0c\u90a3\u5c31\u662f\u7576\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\u4e0d\u662f\u4e00\u500b\u51f8\u51fd\u6578(convex function) \u7684\u6642\u5019\u5b83\u5c31\u6703\u5b58\u5728\u8a31\u591a\u500b\u6700\u4f4e\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u5728\u6211\u5011\u9078\u64c7\u4e0d\u540c\u7684 \u03b2 0 \u548c \u03b2 1 \u4f5c\u70ba\u521d\u59cb\u503c\u7684\u6642\u5019\u5f88\u53ef\u80fd\u6703\u6536\u6582\u65bc\u4e0d\u540c\u7684\u5c40\u90e8\u6700\u4f73\u89e3(local optimum)\u3002\u4e5f\u5c31\u662f\u8aaa\u6211\u5011\u6c42\u5f97\u7684\u6700\u4f73\u7684\u6a21\u578b\u5f88\u6709\u6a5f\u6703\u662f\u5c40\u90e8\u6700\u4f73\u89e3\u800c\u4e0d\u662f\u5168\u5c40\u6700\u4f73\u89e3(global optimum)\u3002 \u4f7f\u7528 Sklearn SGDRegressor Sklearn \u63d0\u4f9b\u4e86 SGDRegressor \u4e26\u5be6\u73fe\u4e86\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5b78\u7fd2\u3002\u4f60\u53ef\u80fd\u6703\u554f\u68af\u5ea6\u4e0b\u964d\u8207\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5169\u8005\u5dee\u5225\u5728\u54ea\uff1f\u7c21\u55ae\u4f86\u8aaa\u4e00\u822c\u7684\u68af\u5ea6\u4e0b\u964d\u6cd5\u662f\u4e00\u6b21\u7528\u5168\u90e8\u8a13\u7df4\u96c6\u7684\u6578\u64da\u8a08\u7b97\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\uff0c\u7136\u5f8c\u505a\u4e00\u6b21\u53c3\u6578\u7684\u66f4\u65b0\u4fee\u6b63\u3002\u800c\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u5c31\u662f\u4e00\u6b21\u8dd1\u4e00\u500b\u6a23\u672c\u6216\u662f\u5c0f\u6279\u6b21\u6a23\u672c\uff0c\u7136\u5f8c\u7b97\u51fa\u4e00\u6b21\u68af\u5ea6\u4e26\u66f4\u65b0\u3002\u800c\u6240\u8b02\u7684\u96a8\u6a5f\u5c31\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u96a8\u6a5f\u5730\u62bd\u53d6\u6a23\u672c\uff0c\u6240\u4ee5\u624d\u6703\u7a31\u70ba\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002 import numpy as np from sklearn.linear_model import SGDRegressor from sklearn.metrics import mean_squared_error # \u96a8\u6a5f\u7522\u751f\u4e00\u500b\u7279\u5fb5\u7684X\u8207\u8f38\u51fay X , y = make_data ( 100 ) # \u5efa\u7acb SGDRegressor \u4e26\u8a2d\u7f6e\u8d85\u53c3\u6578 regModel = SGDRegressor ( max_iter = 100 ) # \u8a13\u7df4\u6a21\u578b regModel . fit ( X , y ) # \u5efa\u7acb\u6e2c\u8a66\u8cc7\u6599 x_test = np . linspace ( - 0.05 , 1 , 500 )[:, None ] # \u9810\u6e2c\u6e2c\u8a66\u96c6 y_test = regModel . predict ( x_test ) # \u9810\u6e2c\u8a13\u7df4\u96c6 y_pred = regModel . predict ( X ) # \u8996\u89ba\u5316\u9810\u6e2c\u7d50\u679c plt . scatter ( X , y ) plt . plot ( x_test . ravel (), y_test , color = \"#d62728\" ) plt . xlabel ( 'x' ) plt . ylabel ( 'y' ) plt . text ( 0 , 10 , 'Loss(MSE)= %.3f ' % mean_squared_error ( y_pred , y ), fontdict = { 'size' : 15 , 'color' : 'red' }) plt . show () \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#day-8-linear-regression","text":"## \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 - \u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78 - \u900f\u904e\u6a5f\u5668\u5b78\u7fd2\u4f86\u627e\u51fa\u4e00\u689d\u51fd\u5f0f\uff0c\u4f86\u6700\u4f73\u5316\u6a21\u578b - \u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5 - \u7dda\u6027\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b - \u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u3001\u591a\u5143\u8ff4\u6b78\u3001\u975e\u7dda\u6027\u8ff4\u6b78 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"[Day 8] \u7dda\u6027\u8ff4\u6b78 (Linear Regression)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_1","text":"\u7dda\u6027\u8ff4\u6b78\u662f\u7d71\u8a08\u4e0a\u5728\u627e\u591a\u500b\u81ea\u8b8a\u6578\u548c\u4f9d\u8b8a\u6578\u4e4b\u9593\u7684\u95dc\u4fc2\u6240\u5efa\u51fa\u4f86\u7684\u6a21\u578b\u3002\u53ea\u6709\u4e00\u500b\u81ea\u8b8a\u6578(x)\u548c\u4e00\u500b\u4f9d\u8b8a\u6578(y)\u7684\u60c5\u5f62\u7a31\u70ba\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78\u5927\u65bc\u4e00\u500b\u81ea\u8b8a\u6578(x 1 ,x 2 ,...)\u7684\u60c5\u5f62\u7a31\u70ba\u591a\u5143\u8ff4\u6b78\u3002 \u4e00\u500b\u7c21\u55ae\u7dda\u6027\u8ff4\u6b78: y=ax+b\uff0c\u5176\u4e2d b\uff1a\u622a\u8ddd(Intercept)\uff0ca\uff1a\u659c\u7387(Slope) \u70ba x \u8b8a\u52d5\u4e00\u500b\u55ae\u4f4d y \u8b8a\u52d5\u7684\u91cf\uff0c\u5982\u4e0b\u5716: \u8ff4\u6b78\u5206\u6790\u7684\u76ee\u6a19\u51fd\u6578\u6216\u7a31\u640d\u5931\u51fd\u6578(loss function)\u5c31\u662f\u5e0c\u671b\u627e\u5230\u7684\u6a21\u578b\u6700\u7d42\u7684\u6b98\u5dee\u8d8a\u5c0f\u8d8a\u597d\uff0c\u4f86\u627e\u53c3\u6578 a \u548c b\u3002","title":"\u8a8d\u8b58\u7dda\u6027\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_2","text":"\u7dda\u6027\u6a21\u578b\u6700\u5e38\u898b\u7684\u89e3\u6cd5\u6709\u5169\u7a2e\uff0c\u5206\u5225\u70ba Closed-form (\u9589\u5f0f\u89e3) \u8207\u68af\u5ea6\u4e0b\u964d (Gradient descent)\u3002\u7576\u7279\u5fb5\u5c11\u6642\u4f7f\u7528 Closed-form \u8f03\u70ba\u9069\u5408\uff0c\u4f7f\u7528\u4e0b\u9762\u516c\u5f0f\u4f86\u6c42\u51fa \u03b8 \u503c\u3002\u6211\u5011\u53c8\u53ef\u4ee5\u8aaa\u7dda\u6027\u6a21\u578b\u7684\u6700\u5c0f\u5e73\u65b9\u6cd5\u7684\u89e3\u5373\u70ba Closed-form\u3002\u82e5\u7576\u662f\u8907\u96dc\u7684\u554f\u984c\u6642 Gradient descen \u8f03\u80fd\u89e3\u6c7a\uff0c\u5176\u539f\u56e0\u662f\u5927\u90e8\u5206\u7684\u554f\u984c\u5176\u5be6\u662f\u6c92\u6709\u516c\u5f0f\u89e3\u7684\u3002\u6211\u5011\u53ea\u80fd\u6c42\u51fa\u4e00\u500b\u51fd\u6578 f(x) \u4f7f\u5176\u8aa4\u5dee\u6700\u5c0f\u8d8a\u597d\u3002 Closed-form Gradient descent ## Least Square Method (\u6700\u5c0f\u5e73\u65b9\u6cd5) \u5047\u8a2d\u4e00\u500b\u5730\u5340\u7684\u623f\u50f9\u8207\u576a\u6578\u662f\u5448\u7dda\u6027\u95dc\u4fc2\uff0c\u4e26\u4ee5\u4e0b\u5716\u4e2d\u7684\u4e09\u500b\u9ede\u8868\u793a\u3002\u5982\u679c\u6211\u5011\u60f3\u900f\u904e\u623f\u5b50\u7684\u576a\u6578\u4f86\u9810\u6e2c\u623f\u50f9\uff0c\u90a3\u9ebc\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8207\u5ea7\u6a19\u5e73\u9762\u4e0a\u9019\u4e09\u500b\u9ede\u7684\u5dee\u8ddd\u8d8a\u5c0f\u8d8a\u597d\u3002\u90a3\u9019\u689d\u76f4\u7dda\u8a72\u600e\u9ebc\u627e\u5462\uff1f\u9996\u5148\u6211\u5011\u96a8\u6a5f\u627e\u4e00\u689d\u76f4\u7dda\uff0c\u4e26\u8a08\u7b97\u9019\u4e09\u9ede\u7684 loss\u3002\u640d\u5931\u51fd\u6578\u53ef\u4ee5\u81ea\u5df1\u5b9a\u7fa9\uff0c\u5047\u8a2d\u6211\u5011\u4f7f\u7528 MSE \u5747\u65b9\u8aa4\u5dee\u4f86\u8a08\u7b97\u3002\u900f\u904e\u4e00\u7cfb\u5217\u8a08\u7b97\u6211\u5011\u5f97\u5230\u4e00\u500b loss \u5373\u70ba MSE \u503c\u3002\u63a5\u8457\u6211\u5011\u5c07\u9019\u500b\u76f4\u7dda\u7a0d\u7a0d\u7684\u8f49\u4e00\u500b\u89d2\u5ea6\u5f8c\u53c8\u53ef\u4ee5\u8a08\u7b97\u4e00\u500b\u65b0\u7684 MSE\uff0c\u6b64\u523b\u6211\u5011\u53ef\u4ee5\u767c\u73fe MSE \u503c\u53c8\u6bd4\u525b\u525b\u66f4\u5c0f\u4e86\u3002\u4e5f\u5c31\u662f\u8aaa\u9019\u4e00\u689d\u65b0\u7684\u76f4\u7dda\u80fd\u5920\u66f4\u6cd5\u61c9\u51fa\u8a13\u7df4\u96c6\u4e2d A\u3001B\u3001C \u7684\u6578\u64da\u9ede\u6240\u53cd\u6620\u7684\u623f\u5c4b\u576a\u6578\u8207\u623f\u50f9\u4e4b\u9593\u7684\u7dda\u6027\u95dc\u4fc2\u3002 \u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\uff0c\u6211\u5011\u53ef\u4ee5\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\u3002\u73fe\u5728\u6211\u5011\u80fd\u505a\u7684\u4e8b\u60c5\u5c31\u662f\u5f9e\u9019\u7121\u6578\u689d\u76f4\u7dda\u4e2d\u9078\u51fa\u4e00\u689d\u6700\u4f73\u7684\u7576\u4f5c\u6211\u5011\u7684\u9810\u6e2c\u6a21\u578b\uff0c\u540c\u6642\u5b83\u9762\u5c0d\u9019\u4e09\u9ede\u7684\u8aa4\u5dee\u662f\u8981\u6700\u5c0f\u7684\u3002\u56e0\u6b64\u6211\u5011\u7684\u76ee\u6a19\u5c31\u662f\u8981\u6700\u5c0f\u5316 MSE \u4e5f\u5c31\u662f\u6240\u8b02\u7684\u640d\u5931\u51fd\u6578 (loss function)\u3002\u6240\u4ee5\u6574\u500b\u7dda\u6027\u8ff4\u6b78\u7684\u76ee\u6a19\u5c31\u662f\u6700\u5c0f\u5316\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\uff0c\u5176\u4e2d\u4e00\u500b\u89e3\u6cd5\u5c31\u662f\u6700\u5c0f\u5e73\u65b9\u6cd5\u3002\u56e0\u70ba MSE \u7b49\u65bc 1/n \u500d\u7684\u6b8b\u5dee\u5e73\u65b9\u548c (RSS)\uff0c\u5176\u4e2d\u5206\u6bcd n \u70ba\u5e38\u6578\uff0c\u4e0d\u5f71\u97ff\u6975\u5c0f\u5316\u6545\u62ff\u6389\u3002\u56e0\u6b64\u6700\u7d42\u7684\u6c42\u89e3\u662f\u6eff\u8db3\u6700\u5c0f\u5316\u5e73\u65b9\u548c\uff0c\u4f7f\u5176\u6700\u5c0f\u5316\u3002\u7d93\u904e\u6578\u5b78\u63a8\u5c0e\u5f8c\uff0c\u7c21\u5316\u7684\u516c\u5f0f\u5982\u4e0b\uff1a","title":"\u5169\u7a2e\u6c42\u89e3\u65b9\u6cd5"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_3","text":"\u57fa\u65bc\u4e0a\u9762\u7684\u516c\u5f0f\u6211\u5011\u60f3\u627e\u51fa\u4e00\u7d44\u53c3\u6578\u6b0a\u91cd \u03b8\u3002\u4e5f\u5c31\u662f\u4e0b\u5716\u554f\u984c\u4e2d\u7684 a (\u03b8 0 )\u3001b (\u03b8 1 ) \u5169\u53c3\u6578\uff0c\u4f7f\u5f97\u5e73\u9762\u4e0a\u9019\u4e09\u9ede\u5e73\u65b9\u548c\u6709\u6975\u5c0f\u503c\u3002\u9019\u500b\u51fd\u5f0f\u5c0d \u03b8 0 , \u03b8 1 \u504f\u505a\u5fae\u5206\u8a2d\u4ed6\u5011\u70ba0\uff0c\u63a5\u8457\u6211\u5011\u5c0d\u65b9\u7a0b\u5f0f\u6c42\u89e3\u3002 \u6b64\u51fd\u5f0f\u53ea\u6709\u6975\u5c0f\u503c\uff0c\u56e0\u6b64\u6211\u5011\u5f97\u5230\u7684 \u03b8 0 , \u03b8 1 \u6700\u5c0f\u6975\u503c\u7684\u89e3\u3002","title":"\u5c0f\u8a66\u8eab\u624b"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_4","text":"","title":"\u7bc4\u4f8b\u7a0b\u5f0f (\u623f\u50f9\u9810\u6e2c)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_5","text":"\u6211\u5011\u900f\u904e Sklearn \u6240\u63d0\u4f9b\u7684\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\u9032\u884c\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5efa\u6a21\uff0c\u4e26\u63a1\u7528\u6700\u5c0f\u5e73\u6cd5\u3002\u9996\u5148\u70ba\u4e86\u8981\u9a57\u8b49\u6211\u5011\u4e0a\u9762\u7684\u516c\u5f0f\uff0c\u56e0\u6b64\u6211\u5011\u5148\u5229\u7528 Numpy \u5957\u4ef6\u81ea\u5df1\u624b\u523b\u505a\u4e00\u7cfb\u5217\u7684\u77e9\u9663\u904b\u7b97\u6c42\u51fa\u6bcf\u4e00\u9805\u7684\u4fc2\u6578\u8207\u622a\u8ddd\u3002 import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8a2d\u5b9a\u622a\u8ddd\u9805 b \u6b0a\u91cd\u503c\u70ba 1 b = np . ones (( X . shape [ 0 ], 1 )) # \u6dfb\u52a0\u5e38\u6578\u9805\u7279\u5fb5\uff0c\u6700\u7d42\u6709 13+1 \u500b\u8f38\u5165\u7279\u5fb5 X = np . hstack (( X , b )) # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a08\u7b97 Beta (@ \u70ba numpy \u4e2d 2-D arrays \u7684\u77e9\u9663\u4e58\u6cd5) Beta = np . linalg . inv ( X . T @ X ) @ X . T @ y y_pred = X @ Beta # MSE: 21.8948311817292 print ( 'MSE:' , mean_squared_error ( y_pred , y )) \u8a08\u7b97\u51fa\u4f86 Beta \u5f8c\u6211\u5011\u518d\u628a\u6240\u6709\u7684 X \u5e36\u5165\u4e26\u505a\u8a08\u7b97\uff0c\u7b97\u51fa\u4f86\u7684\u7d50\u679c MSE \u70ba 21.89\u3002\u6700\u5f8c\u6211\u5011\u53ef\u4ee5\u8a66\u8457\u628a Beta \u8b8a\u6578\u5217\u5370\u51fa\u4f86\u3002\u7e3d\u5171\u6703\u6709 14 \u500b\u53c3\u6578\uff0c\u7531 13 \u500b\u8f38\u5165\u7279\u5fb5\u4fc2\u6578\u8207\u6700\u5f8c\u4e00\u9805\u622a\u8ddd\u6240\u7d44\u6210\u7684\u3002 \u8f38\u51fa\u7d50\u679c\uff1a array([-1.08011358e-01, 4.64204584e-02, 2.05586264e-02, 2.68673382e+00, -1.77666112e+01, 3.80986521e+00, 6.92224640e-04, -1.47556685e+00, 3.06049479e-01, -1.23345939e-02, -9.52747232e-01, 9.31168327e-03, -5.24758378e-01, 3.64594884e+01])","title":"\u624b\u523b\u7dda\u6027\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn-linearregression","text":"\u7dda\u6027\u8ff4\u6b78\u7c21\u55ae\u4f86\u8aaa\uff0c\u5c31\u662f\u5c07\u8907\u96dc\u7684\u8cc7\u6599\u6578\u64da\uff0c\u64ec\u548c\u81f3\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u5c31\u80fd\u65b9\u4fbf\u9810\u6e2c\u672a\u4f86\u7684\u8cc7\u6599\u3002\u63a5\u4e0b\u4f86\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6\uff0c\u4e26\u4f7f\u7528 Sklearn \u63d0\u4f9b\u7684 LinearRegression \u4f86\u6c42\u89e3\u3002 Parameters: - fit_intercept: \u662f\u5426\u6709\u622a\u8ddd\uff0c\u5982\u679c\u6c92\u6709\u5247\u76f4\u7dda\u904e\u539f\u9ede\u3002 Attributes: - coef_: \u53d6\u5f97\u4fc2\u6578\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - score: R2 score \u6a21\u578b\u8a55\u4f30\u3002 import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston # \u8f09\u5165 Sklearn \u623f\u50f9\u9810\u6e2c\u8cc7\u6599\u96c6 13\u500b\u8f38\u5165\u7279\u5fb5 1\u500b\u8f38\u51fa\u7279\u5fb5 boston_dataset = load_boston () # \u8f38\u5165\u7279\u5fb5\u517113\u500b X = boston_dataset . data # \u8f38\u51fa(\u623f\u50f9) y = boston_dataset . target # \u8a13\u7df4\u6a21\u578b linearModel = LinearRegression () linearModel . fit ( X , y ) y_pred = linearModel . predict ( X ) # 21.894831181729202 print ( 'MSE:' , mean_squared_error ( y_pred , y )) Sklearn \u7684 LinearRegression \u6a21\u578b\u4e5f\u662f\u63a1\u7528\u5c0f\u5e73\u65b9\u6cd5\u6c42\u89e3\u3002\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u5176 MSE \u8207\u7a0d\u65e9\u624b\u523b\u7684\u65b9\u6cd5\u76f8\u7576\u5f88\u63a5\u8fd1\u3002\u53e6\u5916 Sklearn \u6a21\u578b\u540c\u6642\u4e5f\u63d0\u4f9b\u4e86 coef_ \u548c intercept_ \u5169\u500b\u5c6c\u6027\u53ef\u4ee5\u53d6\u5f97\u6a21\u578b\u7684\u7279\u5fb5\u4fc2\u6578\u8207\u622a\u8ddd\u3002","title":"\u4f7f\u7528 Sklearn LinearRegression"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_6","text":"\u5c0d\u65bc\u7dda\u6027\u8ff4\u6b78\u4f86\u8aaa\uff0c\u8cc7\u6599\u90fd\u662f\u5f88\u5747\u52fb\u5730\u5206\u5e03\u5728\u4e00\u689d\u76f4\u7dda\u4e0a\uff0c\u4f46\u73fe\u5be6\u7684\u8cc7\u6599\u5f80\u5f80\u662f\u975e\u7dda\u6027\u7684\u5206\u4f48\u3002\u5982\u679c\u6211\u5011\u4e00\u6a23\u4f7f\u7528\u4e0a\u8ff0\u65b9\u6cd5\u53d6\u5f97\u7dda\u6027\u6a21\u578b\uff0c\u5728\u5be6\u969b\u5834\u57df\u4e0a\u9810\u6e2c\u6548\u679c\u53ef\u80fd\u4e26\u4e0d\u5927\u3002 \u591a\u9805\u5f0f\u8ff4\u6b78\u4e2d\uff0c\u6578\u64da\u4e0d\u592a\u5177\u6709\u7dda\u6027\u95dc\u4fc2\uff0c\u56e0\u6b64\u61c9\u5c0b\u627e\u4e00\u4e9b\u975e\u7dda\u6027\u66f2\u7dda\u53bb\u64ec\u5408\u3002\u5c0d\u65bc\u4ee5\u4e0a\u7684\u6578\u64da\uff0c\u539f\u672c\u662f\u53ea\u6709\u4e00\u500b x \u7279\u5fb5\uff0c\u4f46\u662f\u6211\u5011\u53ef\u4ee5\u5efa\u69cb\u8a31\u591a\u65b0\u7684\u7279\u5fb5\u3002\u5982\u4e0b\u5716\uff0c\u7528\u4e00\u689d\u4e09\u6b21\u66f2\u7dda\u53bb\u64ec\u5408\u6578\u64da\u6548\u679c\u66f4\u597d\u3002\u6211\u5011\u5c07\u4e09\u6b21\u51fd\u6578\u770b\u6210 ax 3 +bx 2 +cx+d\u3002\u9019\u6a23\u5c31\u53c8\u8b8a\u6210\u89e3\u591a\u5143\uff0c\u5176\u6211\u5011\u5c31\u662f\u8981\u627e\u51fa a\u3001b\u3001c\u3001d \u4f7f\u5176\u640d\u5931\u51fd\u6578\u6700\u5c0f\u3002","title":"\u591a\u9805\u5f0f\u7684\u8ff4\u6b78\u6a21\u578b"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#_7","text":"\u5f9e\u4e0a\u8ff0\u554f\u984c\u4e2d\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u7dda\u6027\u8ff4\u6b78\u5728\u5be6\u52d9\u4e0a\u6240\u9762\u81e8\u7684\u554f\u984c\u3002\u9996\u5148\u6211\u5011\u4f86\u8ff4\u9867\u4e00\u4e0b\u7a0d\u65e9\u6240\u63d0\u5230\u7684\u7dda\u6027\u65b9\u7a0b\u5f0f\uff0c\u9019\u7d44\u7dda\u6027\u65b9\u7a0b\u5f0f\u8aaa\u660e\u4e86\u6bcf\u500b\u7279\u5fb5 x \u4e00\u6b21\u65b9\u8207\u76ee\u6a19\u503c\u662f\u6709\u4e00\u500b\u7dda\u6027\u7684\u95dc\u4fc2\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + ... + \u03b2 n x n \u63a5\u8457\u6211\u5011\u518d\u4f86\u770b\u4e00\u4e0b\u53e6\u4e00\u500b\u4f8b\u5b50\uff0c\u6bd4\u5982\u8aaa\u7279\u5fb5 x 1 \u8207\u76ee\u6a19\u503c\u5b58\u5728\u8457\u4ee5\u4e0b\u7684\u95dc\u4fc2\u3002\u6211\u5011\u767c\u73fe\u9019\u7d44\u65b9\u7a0b\u5f0f\u5df2\u7d93\u4e0d\u662f\u4e00\u500b\u7dda\u6027\u95dc\u4fc2\u4e86\uff0c\u56e0\u70ba\u4ed6\u6709\u4e86 x 1 \u7684\u4e8c\u6b21\u65b9\u3002 y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 \u90a3\u9ebc\u8a72\u600e\u9ebc\u505a\u6211\u5011\u624d\u80fd\u53c8\u628a\u5b83\u8f49\u63db\u6210\u7dda\u6027\u95dc\u4fc2\u5462\uff1f\u9019\u6642\u5019\u6211\u5011\u5c31\u53ef\u4ee5\u7528\u4e00\u500b\u65b0\u7684\u7279\u5fb5 x 2 \u3002\u6211\u5011\u8b93 x 2 \u7b49\u65bc x 1 \u7684\u5e73\u65b9\uff0c\u9019\u6a23\u6211\u5011\u518d\u628a x 2 \u5e36\u8ff4\u539f\u65b9\u7a0b\u5f0f\u4e2d\u3002\u6b64\u6642\u9019\u5169\u500b\u7279\u5fb5 x 1 \u8207 x 2 \u8207\u76ee\u6a19\u503c\u53c8\u8ff4\u5230\u4e86\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 \u540c\u6a23\u7684\u6211\u5011\u518d\u4f86\u770b\u53e6\u4e00\u500b\u4f8b\u5b50\u3002\u6211\u5011\u5982\u679c\u5f15\u5165\u4e86 x 1 \u7684\u4e09\u6b21\u65b9\u7684\u8a71\uff0c\u4ed6\u7684\u65b9\u7a0b\u5f0f\u5982\u4e0b\uff1a y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 1 2 + \u03b2 3 x 1 3 \u540c\u7406\u6211\u5011\u9019\u6642\u4e00\u6a23\u53ef\u4ee5\u5f15\u5165\u65b0\u7684\u7279\u5fb5 x 2 \u7b49\u65bc x 1 \u7684\u4e8c\u6b21\u65b9\uff0c\u4ee5\u53ca x 3 \u7b49\u65bc x 1 \u7684\u4e09\u6b21\u65b9\u3002\u9019\u6a23\u7d93\u904e\u4e00\u500b\u8f49\u63db\u4ee5\u5f8c\u6211\u5011\u7684 y \u503c\u8207\u6240\u6709\u7684\u7279\u5fb5\u9593\u4f9d\u7136\u5b58\u5728\u8457\u7dda\u6027\u95dc\u4fc2\u3002 Let x 2 = x 1 2 and x 3 = x 1 3 => y = \u03b2 0 + \u03b2 1 x 1 + \u03b2 2 x 2 + \u03b2 3 x 3 \u9019\u88e1\u505a\u4e00\u500b\u5c0f\u7d50\u3002\u6211\u5011\u53ef\u4ee5\u900f\u904e\u5f15\u5165\u8f49\u8b8a\u904e\u5f8c\u7684 x \u4f5c\u70ba\u4e00\u500b\u65b0\u7684\u7279\u5fb5\u4f86\u6eff\u8db3\u7dda\u6027\u5047\u8a2d\u3002\u6b64\u6642\u7684\u8ff4\u6b78\u65b9\u7a0b\u5f0f\u5c31\u662f\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78(polynomial regression)\u3002","title":"\u7dda\u6027\u6a21\u578b\u7684\u64f4\u5c55"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn","text":"\u7531\u65bc Sklearn \u6c92\u6709\u5c01\u88dd\u597d\u7684\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb\u3002\u4e0d\u904e\u6211\u5011\u53ef\u4ee5\u900f\u904e make_pipeline \u5c07 PolynomialFeatures \u8207 LinearRegression \u5c01\u88dd\u6210\u4e00\u500b\u591a\u9805\u5f0f\u8ff4\u6b78\u6a21\u578b\uff0c\u4e26\u4e14\u4f7f\u7528\u8005\u53ef\u4ee5\u96a8\u610f\u8a2d\u5b9a degree(\u6b21\u65b9)\u503c\u3002 \u6211\u5011\u53ef\u4ee5\u5c0d\u539f\u672c\u7684\u7279\u5fb5\u9032\u884c PolynomialFeatures \u69cb\u9020\u65b0\u6a23\u672c\u7279\u5fb5\u63a1\u3002\u4e26\u5c07\u8f49\u63db\u5f8c\u7684\u7279\u5fb5\u9001\u5230\u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u9032\u884c\u64ec\u5408\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u81ea\u5b9a\u7fa9\u4e00\u500b PolynomialRegression() \u7684\u51fd\u5f0f\uff0c\u4f7f\u7528\u8005\u53ef\u4ee5\u8f38\u5165 degree \u5927\u5c0f\u63a7\u5236\u6a21\u578b\u7684\u5f37\u5ea6\u3002\u5728\u9019\u500b\u51fd\u5f0f\u4e2d\u6211\u5011\u4f7f\u7528 Sklearn \u7684 pipeline \u65b9\u6cd5\u5c07 PolynomialFeatures \u7279\u5fb5\u8f49\u63db\u8207 LinearRegression \u7dda\u6027\u8ff4\u6b78\u6a21\u578b\u5c01\u88dd\u8d77\u4f86\u3002\u53e6\u5916\u4ee5\u4e0b\u7bc4\u4f8b\u662f\u900f\u904e\u81ea\u8a02\u7fa9\u7684 make_data() \u51fd\u5f0f\u7522\u751f\u4e00\u7d44\u96a8\u6a5f\u7684 x \u548c y\u3002\u8a72\u51fd\u5f0f\u4e2d\u53ef\u4ee5\u8a2d\u5b9a\u96a8\u6a5f\u8cc7\u6599\u7684\u6bd4\u6578\uff0c\u4e0b\u9762\u7a0b\u5f0f\u4e2d\u6211\u5011\u5148\u96a8\u6a5f\u5efa\u7acb 100 \u7b46\u6578\u64da\u3002 from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline plt . style . use ( 'seaborn' ) # make_pipeline\u662f\u6307\u53ef\u4ee5\u5c07\u591a\u500bSklearn\u7684function\u4e00\u8d77\u57f7\u884c def PolynomialRegression ( degree = 2 , ** kwargs ): return make_pipeline ( PolynomialFeatures ( degree ), LinearRegression ( ** kwargs )) # \u96a8\u6a5f\u5b9a\u7fa9\u65b0\u7684x,y\u503c def make_data ( N , err = 1 , rseed = 42 ): rng = np . random . RandomState ( rseed ) x = rng . rand ( N , 1 ) ** 2 y = 10 - 1 / ( x . ravel () + 0.1 ) if err > 0 : y += err * rng . randn ( N ) return x , y X , y = make_data ( 100 ) \u8a13\u7df4\u8cc7\u6599\u8207\u6e2c\u8a66\u8cc7\u6599\u90fd\u5efa\u7acb\u5b8c\u6210\u5f8c\u3002\u6211\u5011\u5c31\u53ef\u4ee5\u5c07\u8a13\u7df4\u8cc7\u6599\u4e1f\u5165\u5efa\u7acb\u597d\u7684 PolynomialRegression() \u4e26\u9032\u884c\u6578\u64da\u64ec\u5408\u3002\u4e0b\u9762\u7bc4\u4f8b\u7a0b\u5f0f\u4e2d\u6211\u5011\u6f14\u793a degree \u7b49\u65bc 1\u30013\u30019\uff0c\u4e26\u4f86\u67e5\u770b\u96a8\u8457\u6b21\u65b9\u6578\u7684\u589e\u9577\u5c0d\u65bc\u6a21\u578b\u7684\u64ec\u5408\u7a0b\u5ea6\u7684\u5f71\u97ff\u3002 # \u6e2c\u8a66\u8cc7\u6599\u96c6 x_test = np . linspace ( - 0.1 , 1.1 , 500 )[:, None ] # \u7e6a\u88fd\u771f\u5be6\u7b54\u6848\u7684\u5206\u4f48 plt . scatter ( X . ravel (), y , color = 'black' ) # \u6e2c\u8a66 1,3,7 \u7684degree for degree in [ 1 , 3 , 9 ]: y_test = PolynomialRegression ( degree ) . fit ( X , y ) . predict ( x_test ) plt . plot ( x_test . ravel (), y_test , label = 'degree= {} ' . format ( degree )) plt . xlim ( - 0.1 , 1.0 ) plt . ylim ( - 2 , 12 ) plt . legend ( loc = 'best' ) \u5f9e\u8a13\u7df4\u7d50\u679c\u53ef\u4ee5\u767c\u73fe\u96a8\u8457\u6b21\u65b9\u6578 degree \u7684\u589e\u9577\u6a21\u578b\u6703\u8b8a\u5f97\u8d8a\u8907\u96dc\u3002\u540c\u6642\u5c0d\u65bc\u8a13\u7df4\u6578\u64da\u7684\u64ec\u5408\u7d50\u679c\u8d8a\u597d\u3002\u4f46\u662f\u9019\u88e1\u5fc5\u9808\u6ce8\u610f\u4e26\u975e\u8d8a\u5927\u7684 degree \u5c31\u662f\u8d8a\u597d\u7684\uff0c\u56e0\u70ba\u96a8\u8457\u6a21\u578b\u8907\u96dc\u6703\u6709\u904e\u5ea6\u64ec\u5408\u7684\u8de1\u8c61\u3002\u56e0\u6b64\u6211\u5011\u5fc5\u9808\u627e\u51fa\u4e00\u500b\u9069\u7576\u7684 degree \u6578\u503c\u4e26\u8207\u6e2c\u8a66\u96c6\u9a57\u8b49\u8207\u8a55\u4f30\u3002\u76ee\u6a19\u662f\u8a13\u7df4\u96c6\u8207\u6e2c\u8a66\u96c6\u7684 MSE \u5dee\u8ddd\u8981\u8d8a\u5c0f\u8d8a\u597d\u3002\u5982\u679c\u6211\u5011\u4e00\u6627\u7684\u8ffd\u6c42\u8a13\u7df4\u96c6\u7684\u640d\u5931\u6700\u5c0f\u5316\uff0c\u53ef\u80fd\u6703\u5f71\u97ff\u5230\u6e2c\u8a66\u96c6\u7684\u8868\u73fe\u80fd\u529b\u5c0e\u81f4\u9810\u6e2c\u7d50\u679c\u8b8a\u5dee\u3002","title":"Sklearn \u5be6\u4f5c\u591a\u9805\u5f0f\u8ff4\u6b78"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#gradient-descent","text":"\u63a5\u4e0b\u4f86\u6211\u5011\u4f86\u8a0e\u8ad6\u512a\u5316\u554f\u984c\u7684\u7b2c\u4e8c\u7a2e\u65b9\u6cd5\uff0c\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002\u68af\u5ea6\u4e0b\u964d\u4e0d\u50c5\u9650\u65bc\u7dda\u6027\u8ff4\u6b78\uff0c\u5728\u975e\u7dda\u6027\u548c\u795e\u7d93\u7db2\u7d61\u540c\u6a23\u9069\u7528\u3002\u4e0b\u5716\u4e2d\u6bcf\u4e00\u500b\u9ede\u662f\u8a13\u7df4\u96c6\u7684\u6a23\u672c x \u8ef8\u70ba\u8f38\u5165\u503c y \u8ef8\u70ba\u8f38\u51fa\u503c\u3002\u4e5f\u5c31\u662f\u5e73\u9762\u4e0a\u6bcf\u500b\u9ede x \u90fd\u6703\u6709\u4e00\u500b\u76f8\u5c0d\u61c9 y \u7684\u8f38\u51fa\uff0c\u56e0\u6b64\u6211\u5011\u9700\u8981\u505a\u7684\u4e8b\u60c5\u662f\u70ba\u9019\u4e9b\u9ede\u8a13\u7df4\u4e00\u500b\u6a21\u578b\uff0c\u4f7f\u5f97\u9019\u689d\u76f4\u7dda\u80fd\u5920\u76e1\u53ef\u80fd\u53cd\u6620\u51fa x \u8207 y \u4e4b\u9593\u7684\u95dc\u4fc2\u3002\u6b64\u5916\u6211\u5011\u90fd\u77e5\u9053\u5728\u4e00\u500b\u4e8c\u7dad\u7a7a\u9593\u4e2d\u6211\u5011\u80fd\u627e\u5230\u7121\u6578\u689d\u76f4\u7dda\uff0c\u90a3\u6211\u5011\u8a72\u5982\u4f55\u627e\u5230\u9019\u689d\u6700\u4f73\u7684\u76f4\u7dda\u5462\uff1f\u7c21\u55ae\u4f86\u8aaa\u6211\u5011\u7684\u76ee\u6a19\u662f\u8981\u4f7f\u5f97\u9019\u4e9b\u8a13\u7df4\u8cc7\u6599\u4e2d\u7684\u6bcf\u500b\u6a23\u672c\u9ede\u5230\u9019\u4e00\u689d\u76f4\u7dda\u7684\u8ddd\u96e2\u5e73\u65b9\u548c\u8981\u6700\u5c0f\u3002\u56e0\u6b64\u9019\u88e1\u6211\u5011\u5c07\u8a0e\u8ad6\u8a72\u5982\u4f55\u4f7f\u7528\u68af\u5ea6\u4e0b\u964d\u6cd5\u4f86\u6700\u4f73\u5316\u6211\u5011\u7684\u6a21\u578b\u3002\u9996\u5148\u6211\u5011\u5047\u8a2d\u4e00\u500b\u76f4\u7dda\u7684\u65b9\u7a0b\u5f0f\u662f y = \u03b2 0 + \u03b2 1 x\u3002\u90a3\u9996\u5148\u6211\u5011\u53ef\u4ee5\u5148\u96a8\u6a5f\u7684\u7d66\u4e88 \u03b2 0 \u548c \u03b2 1 \u4e00\u500b\u521d\u59cb\u503c\u3002\u4e26\u5f97\u5230\u4e0b\u5716\u4e2d\u7684\u7d50\u679c\uff0c\u6211\u5011\u53ef\u4ee5\u767c\u73fe\u9019\u4e00\u689d\u76f4\u7dda\u4e26\u4e0d\u80fd\u53cd\u6620\u51fa x \u548c y \u7684\u95dc\u806f\u6027\u3002 \u5982\u679c\u6211\u5011\u4e0d\u65b7\u7684\u8fed\u4ee3\uff0c\u6bcf\u4e00\u6b21\u7684\u8fed\u4ee3\u90fd\u8b93\u9019\u4e00\u689d\u76f4\u7dda\u671d\u8457\u66f4\u7b26\u5408\u6578\u64da\u9ede\u7684\u65b9\u5411\u79fb\u52d5\u4e00\u9ede\uff0c\u90a3\u9ebc\u7d93\u904e\u8a31\u591a\u6b21\u7684\u66f4\u65b0\u6211\u5011\u5c31\u53ef\u4ee5\u5f97\u5230\u6700\u4f73\u7684\u7d50\u679c\u3002\u7c21\u55ae\u4f86\u8aaa\u5c31\u662f\u5728\u6bcf\u6b21\u7684\u8fed\u4ee3\u8981\u66f4\u65b0\u6240\u6709\u7684\u53c3\u6578\uff0c\u4f8b\u5982\uff1a \u03b2 0 \u548c \u03b2 1 \uff0c\u76f4\u5230\u5f97\u5230\u6700\u5c0f\u7684 MSE \u6216\u662f\u9810\u5b9a\u7684\u8fed\u4ee3\u6b21\u6578\u3002\u4ee5\u4e0b\u7684\u516c\u5f0f\u5c31\u662f\u68af\u5ea6\u4e0b\u964d\u6cd5\u7684\u8868\u9054\u5f0f\u3002\u5b83\u53cd\u6620\u7684\u662f\u6bcf\u6b21\u8fed\u4ee3\uff0c\u6211\u5011\u7684 \u03b2 0 \u548c \u03b2 1 \u9019\u4e9b\u53c3\u6578\u662f\u5982\u4f55\u8abf\u6574\u7684\u3002\u6211\u5011\u53ef\u4ee5\u5f9e\u9019\u500b\u516c\u5f0f\u5f97\u77e5\uff0c\u4ed6\u662f\u5c0d\u640d\u5931\u51fd\u6578\u6c42\u4e86\u67d0\u4e00\u500b\u7279\u5b9a\u53c3\u6578\u7684\u504f\u5c0e\u3002\u9019\u5c31\u662f\u6240\u8b02\u7684\u68af\u5ea6\uff0c\u6211\u5011\u671d\u8457\u68af\u5ea6\u7684\u53cd\u65b9\u5411\u5728\u66f4\u65b0\u3002\u7136\u800c\u6bcf\u4e00\u6b21\u8981\u66f4\u65b0\u591a\u5927\u53ef\u4ee5\u4f9d\u9760 \u03b7\uff08(eta) \u4f86\u63a7\u5236\uff0c\u56e0\u6b64\u6211\u5011\u7b97\u51fa\u4f86\u7684\u68af\u5ea6\u9084\u6703\u4e58\u4e0a\u4e00\u500b\u5b78\u7fd2\u901f\u7387\u4f86\u9632\u6b62\u66f4\u65b0\u6b65\u4f10\u592a\u5927\u800c\u5c0e\u81f4\u627e\u4e0d\u5230\u89e3\u3002\u6240\u4ee5 \u03b7 \u7684\u5927\u5c0f\u8981\u9069\u4e2d\u4ee5\u514d\u5f71\u97ff\u5230\u6a21\u578b\u6700\u7d42\u7684\u6536\u6582\u3002 \u6b64\u5916\u9019\u500b\u6a21\u578b\u5982\u679c\u900f\u904e\u68af\u5ea6\u4e0b\u964d\u6cd5\u9084\u6709\u4e00\u500b\u7f3a\u9ede\uff0c\u90a3\u5c31\u662f\u7576\u6211\u5011\u7684\u640d\u5931\u51fd\u6578\u4e0d\u662f\u4e00\u500b\u51f8\u51fd\u6578(convex function) \u7684\u6642\u5019\u5b83\u5c31\u6703\u5b58\u5728\u8a31\u591a\u500b\u6700\u4f4e\u9ede\uff0c\u9032\u800c\u5c0e\u81f4\u5728\u6211\u5011\u9078\u64c7\u4e0d\u540c\u7684 \u03b2 0 \u548c \u03b2 1 \u4f5c\u70ba\u521d\u59cb\u503c\u7684\u6642\u5019\u5f88\u53ef\u80fd\u6703\u6536\u6582\u65bc\u4e0d\u540c\u7684\u5c40\u90e8\u6700\u4f73\u89e3(local optimum)\u3002\u4e5f\u5c31\u662f\u8aaa\u6211\u5011\u6c42\u5f97\u7684\u6700\u4f73\u7684\u6a21\u578b\u5f88\u6709\u6a5f\u6703\u662f\u5c40\u90e8\u6700\u4f73\u89e3\u800c\u4e0d\u662f\u5168\u5c40\u6700\u4f73\u89e3(global optimum)\u3002","title":"Gradient descent (\u68af\u5ea6\u4e0b\u964d\u6cd5)"},{"location":"8.\u7dda\u6027\u8ff4\u6b78/#sklearn-sgdregressor","text":"Sklearn \u63d0\u4f9b\u4e86 SGDRegressor \u4e26\u5be6\u73fe\u4e86\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5b78\u7fd2\u3002\u4f60\u53ef\u80fd\u6703\u554f\u68af\u5ea6\u4e0b\u964d\u8207\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u5169\u8005\u5dee\u5225\u5728\u54ea\uff1f\u7c21\u55ae\u4f86\u8aaa\u4e00\u822c\u7684\u68af\u5ea6\u4e0b\u964d\u6cd5\u662f\u4e00\u6b21\u7528\u5168\u90e8\u8a13\u7df4\u96c6\u7684\u6578\u64da\u8a08\u7b97\u640d\u5931\u51fd\u6578\u7684\u68af\u5ea6\uff0c\u7136\u5f8c\u505a\u4e00\u6b21\u53c3\u6578\u7684\u66f4\u65b0\u4fee\u6b63\u3002\u800c\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u5c31\u662f\u4e00\u6b21\u8dd1\u4e00\u500b\u6a23\u672c\u6216\u662f\u5c0f\u6279\u6b21\u6a23\u672c\uff0c\u7136\u5f8c\u7b97\u51fa\u4e00\u6b21\u68af\u5ea6\u4e26\u66f4\u65b0\u3002\u800c\u6240\u8b02\u7684\u96a8\u6a5f\u5c31\u662f\u5728\u8a13\u7df4\u904e\u7a0b\u4e2d\u96a8\u6a5f\u5730\u62bd\u53d6\u6a23\u672c\uff0c\u6240\u4ee5\u624d\u6703\u7a31\u70ba\u96a8\u6a5f\u68af\u5ea6\u4e0b\u964d\u6cd5\u3002 import numpy as np from sklearn.linear_model import SGDRegressor from sklearn.metrics import mean_squared_error # \u96a8\u6a5f\u7522\u751f\u4e00\u500b\u7279\u5fb5\u7684X\u8207\u8f38\u51fay X , y = make_data ( 100 ) # \u5efa\u7acb SGDRegressor \u4e26\u8a2d\u7f6e\u8d85\u53c3\u6578 regModel = SGDRegressor ( max_iter = 100 ) # \u8a13\u7df4\u6a21\u578b regModel . fit ( X , y ) # \u5efa\u7acb\u6e2c\u8a66\u8cc7\u6599 x_test = np . linspace ( - 0.05 , 1 , 500 )[:, None ] # \u9810\u6e2c\u6e2c\u8a66\u96c6 y_test = regModel . predict ( x_test ) # \u9810\u6e2c\u8a13\u7df4\u96c6 y_pred = regModel . predict ( X ) # \u8996\u89ba\u5316\u9810\u6e2c\u7d50\u679c plt . scatter ( X , y ) plt . plot ( x_test . ravel (), y_test , color = \"#d62728\" ) plt . xlabel ( 'x' ) plt . ylabel ( 'y' ) plt . text ( 0 , 10 , 'Loss(MSE)= %.3f ' % mean_squared_error ( y_pred , y ), fontdict = { 'size' : 15 , 'color' : 'red' }) plt . show () \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528 Sklearn SGDRegressor"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/","text":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19 \u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u7dda\u6027\u5206\u985e\u5668 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u6bd4\u8f03\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b \u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5efa\u7acb\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a \u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u662f\u7531\u7dda\u6027\u8ff4\u6b78\u8b8a\u5316\u800c\u4f86\u7684\uff0c\u5b83\u662f\u4e00\u7a2e\u5206\u985e\u7684\u6a21\u578b\u3002\u5176\u76ee\u6a19\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u6240\u6709\u6578\u64da\u6e05\u695a\u5730\u5206\u958b\u4e26\u505a\u5206\u985e\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u8ff4\u6b78\u7684\u7dda\u6027\u5206\u985e\u5668\u3002\u908f\u8f2f\u8ff4\u6b78\u5176\u5be6\u662f\u5728\u8aaa\u660e\u4e00\u500b\u6a5f\u7387\u7684\u610f\u7fa9\uff0c\u900f\u904e\u4e00\u500b function \u53bb\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\uff0c\u4e0d\u540c\u7684 w,b \u5c31\u6703\u5f97\u5230\u4e0d\u540c\u7684 function\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u8aaa f w,b (x) \u5373\u70ba posteriror probability\u3002 \u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u662f\u7528\u4f86\u8655\u7406\u5206\u985e\u554f\u984c\uff0c\u76ee\u6a19\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u5c07\u8cc7\u6599\u505a\u5206\u985e\u3002\u4e3b\u8981\u662f\u5229\u7528 sigmoid function \u5c07\u8f38\u51fa\u8f49\u63db\u6210 0~1 \u7684\u503c\uff0c\u8868\u793a\u53ef\u80fd\u70ba\u9019\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002\u800c\u7dda\u6027\u8ff4\u6b78\u662f\u7528\u4f86\u9810\u6e2c\u4e00\u500b\u9023\u7e8c\u7684\u503c\uff0c\u76ee\u6a19\u662f\u60f3\u627e\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u903c\u8fd1\u771f\u5be6\u7684\u8cc7\u6599\u3002 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u500b\u6700\u57fa\u672c\u7684\u4e8c\u5143\u7dda\u6027\u5206\u985e\u5668\u3002\u6211\u5011\u8981\u627e\u4e00\u500b\u6a5f\u7387 (posterior probability) \u7576\u6a5f\u7387 P(C1|x) \u5927\u65bc 0.5 \u6642\u5247\u8f38\u51fa\u9810\u6e2c Class 1\uff0c\u53cd\u4e4b\u6a5f\u7387\u5c0f\u65bc 0.5 \u5247\u8f38\u51fa Class 2\u3002\u5982\u679c\u6211\u5011\u5047\u8a2d\u8cc7\u6599\u662f Gaussian \u6a5f\u7387\u5206\u4f48\uff0c\u6211\u5011\u53ef\u4ee5\u8aaa\u9019\u500b posterior probability \u5c31\u662f \ud835\udf0e(\ud835\udc67)\u3002\u5176\u4e2d z=w*x+b \uff0cx \u70ba\u8f38\u5165\u7279\u5fb5\uff0c\u800c w \u8207 b \u5206\u5225\u70ba\u6b0a\u91cd(weight)\u8207\u504f\u6b0a\u503c(bias) \u4ed6\u5011\u662f\u900f\u904e\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\u3002 \u4ee5\u4e0b\u5c31\u662f\u4e00\u500b\u908f\u8f2f\u8ff4\u6b78\u7684\u904b\u4f5c\u6a5f\u5236\uff0c\u5982\u679c\u4ee5\u5716\u50cf\u5316\u8868\u793a\u6703\u9577\u9019\u6a23\u3002\u6211\u5011\u7684 function \u6703\u6709\u5169\u7d44\u53c3\u6578\uff0c\u4e00\u7d44\u662f w \u6211\u5011\u7a31\u70ba weight\uff0c\u53e6\u4e00\u500b\u5e38\u6578 b \u7a31\u70ba bias\u3002\u5047\u8a2d\u6211\u5011\u6709\u5169\u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4e26\u5c07\u9019\u5169\u500b\u8f38\u5165\u5206\u5225\u4e58\u4e0a w \u518d\u52a0\u4e0a b \u5c31\u53ef\u4ee5\u5f97\u5230 z\uff0c\u7136\u5f8c\u901a\u904e\u4e00\u500b sigmoid function \u5f97\u5230\u7684\u8f38\u51fa\u5c31\u662f posterior probability\u3002 \u5728\u908f\u8f2f\u8ff4\u6b78\u4e2d\u6211\u5011\u5b9a\u7fa9\u7684\u640d\u5931\u51fd\u6578\u662f\u8981\u53bb\u6700\u5c0f\u5316\u7684\u5c0d\u8c61\u662f\u6240\u6709\u8a13\u7df4\u8cc7\u6599 cross entropy \u7684\u7e3d\u548c\u3002\u6211\u5011\u5e0c\u671b\u6a21\u578b\u7684\u8f38\u51fa\u8981\u8ddf\u76ee\u6a19\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5c07\u6700\u5c0f\u5316\u7684\u76ee\u6a19\u5beb\u6210\u4e00\u500b\u51fd\u6578\uff1a \u6700\u5f8c\u662f\u5c0b\u627e\u4e00\u7d44\u6700\u597d\u7684\u53c3\u6578\uff0c\u4f7f\u5f97 loss \u80fd\u5920\u6700\u4f4e\u3002\u56e0\u6b64\u9019\u88e1\u63a1\u7528\u68af\u5ea6\u4e0b\u964d (Gradient Descent) \u4f86\u6700\u5c0f\u5316\u4ea4\u53c9\u71b5 (Cross Entropy)\u3002\u6211\u5011\u5c07\u640d\u5931\u51fd\u6578\u5c0d\u6b0a\u91cd\u6c42\u504f\u5c0e\u5f8c\uff0c\u53ef\u4ee5\u5f97\u5230\u4e0b\u9762\u7684\u6b0a\u91cd\u66f4\u65b0\u7684\u5f0f\u5b50\uff1a \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 (Multinomial Logistic Regression) \u5728 Sklearn \u4e2d\u4e5f\u80fd\u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5206\u985e\u5668\u61c9\u7528\u5728\u591a\u985e\u5225\u7684\u5206\u985e\u554f\u984c\u4e0a\uff0c\u5c0d\u65bc\u591a\u5143\u908f\u8f2f\u8ff4\u6b78\u6709 one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u65b9\u6cd5\u3002\u5169\u8005\u7684\u505a\u6cd5\u90fd\u662f\u5c07\u6240\u6709\u985e\u5225\u7684\u8cc7\u6599\u4f9d\u5e8f\u4f5c\u4e8c\u5143\u5206\u985e\u8a13\u7df4\u3002MvM \u76f8\u8f03\u65bc OvR \u6bd4\u8f03\u7cbe\u6e96\uff0c\u4f46 liblinear \u53ea\u652f\u63f4 OvR\u3002 one-vs-rest(OvR): \u8a13\u7df4\u6642\u628a\u67d0\u500b\u985e\u5225\u7684\u8cc7\u6599\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u8cc7\u6599\u6b78\u70ba\u53e6\u4e00\u985e\u505a\u908f\u8f2f\u8ff4\u6b78\uff0c\u56e0\u6b64\u82e5\u6709 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u6703\u6709 k \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u9996\u5148\u62bd\u53d6 A \u985e\u5225\u7684\u8cc7\u6599\u505a\u70ba\u6b63\u96c6\uff0cB\u3001C \u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; B \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001C \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; C \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001B \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6\u3002\u900f\u904e\u9019\u4e09\u7d44\u8a13\u7df4\u96c6\u5206\u5225\u9032\u884c\u8a13\u7df4\uff0c\u7136\u5f8c\u7684\u5f97\u5230\u4e09\u500b\u5206\u985e\u5668 f1(x)\u3001f2(x)\u3001f3(x)\u3002\u9810\u6e2c\u7684\u6642\u5019\u5c31\u662f\u628a\u8cc7\u6599\u4e1f\u9032\u4e09\u500b\u5206\u985e\u5668\uff0c\u67e5\u770b\u54ea\u500b\u5206\u985e\u5668\u9810\u6e2c\u7684\u5206\u6578\u6700\u9ad8\u5c31\u6c7a\u5b9a\u8a72\u985e\u5225\u3002 many-vs-many(MvM): \u8207 OvR \u5dee\u5225\u5728\u65bc\u8a13\u7df4\u6642\u6bcf\u6b21\u53ea\u6703\u6311\u5169\u500b\u985e\u5225\u8a13\u7df4\u4e00\u500b\u5206\u985e\u5668\uff0c\u56e0\u6b64 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u5c31\u9700\u8981 k(k-1)/2 \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u56e0\u6b64\u6211\u5011\u6703\u6709\u4e09\u7d44\u4e8c\u5143\u5206\u985e\u5668\u5206\u5225\u6709 (A\u3001B)\u3001(A\u3001C) \u8207 (B\u3001C)\u3002\u8a13\u7df4\u5b8c\u6210\u5f8c\u7576\u6709\u65b0\u8cc7\u6599\u8981\u9810\u6e2c\u6642\uff0c\u628a\u8cc7\u6599\u5206\u5225\u5c0d\u4e09\u500b\u4e8c\u5143\u5206\u985e\u5668\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u5f97\u5230\u9810\u6e2c\u7d50\u679c\u3002 [\u7a0b\u5f0f\u5be6\u4f5c] \u908f\u8f2f\u8ff4\u6b78 (\u5206\u985e\u5668) \u908f\u8f2f\u8ff4\u6b78\u96d6\u7136\u6709\u8ff4\u6b78\u5169\u5b57\u4f46\u4ed6\u5176\u5be6\u662f\u88ab\u7528\u4f86\u505a\u5206\u985e\u7684\uff0c\u76ee\u7684\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u5169\u500b\u985e\u5225\u5206\u958b\u3002\b\u672c\u7bc4\u4f8b\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u5206\u985e\u5668\u5be6\u9a57\uff0c\u5e0c\u671b\u80fd\u5920\u900f\u904e\u7dda\u6027\u5206\u985e\u5668\u5c07\u4e09\u500b\u985e\u5225\u5f7c\u6b64\u5340\u9694\u958b\u3002 Parameters: - penalty: \u6b63\u898f\u5316l1/l2\uff0c\u9632\u6b62\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002 - C: \u6578\u503c\u8d8a\u5927\u5c0d weight \u7684\u63a7\u5236\u529b\u8d8a\u5f31\uff0c\u9810\u8a2d\u70ba1\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - solver: \u512a\u5316\u5668\u7684\u9078\u64c7\u3002newton-cg,lbfgs,liblinear,sag,saga\u3002\u9810\u8a2d\u70baliblinear\u3002 - multi_class: \u9078\u64c7\u5206\u985e\u65b9\u5f0f\uff0covr\u5c31\u662fone-vs-rest(OvR)\uff0c\u800cmultinomial\u5c31\u662fmany-vs-many(MvM)\u3002\u9810\u8a2d\u70ba auto\uff0c\u6545\u6a21\u578b\u8a13\u7df4\u4e2d\u6703\u53d6\u4e00\u500b\u6700\u597d\u7684\u7d50\u679c\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba100\u4ee3\u3002 - class_weight: \u82e5\u9047\u8cc7\u6599\u4e0d\u5e73\u8861\u554f\u984c\u53ef\u4ee5\u8a2d\u5b9abalance\uff0c\u9810\u8a2d=None\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\u50c5\u5728solver=sag/liblinear\u6642\u6709\u7528\u3002 Attributes: - coef_: \u53d6\u5f97\u659c\u7387\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.linear_model import LogisticRegression # \u5efa\u7acbLogistic\u6a21\u578b logisticModel = LogisticRegression ( random_state = 0 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b logisticModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = logisticModel . predict ( X_train ) \u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b \u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , logisticModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , logisticModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9714285714285714 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u900f\u904e Sklearn \u7684 LogisticRegression \u53ef\u4ee5\u5be6\u4f5c\u4e00\u500b\u5178\u578b\u7684\u4e8c\u5143\u5206\u985e\u5668\u3002\u4e0d\u904e\u7576\u6709\u591a\u500b\u985e\u5225\u7684\u6642\u5019\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53c3\u6578 multi_class \u4f86\u8a2d\u5b9a\u591a\u5143\u5206\u985e\u5668\u7684\u5b78\u7fd2\u6a5f\u5236\u3002\u6211\u5011\u53ef\u4ee5\u89c0\u5bdf\u4e00\u4e0b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u70ba\u4e86\u65b9\u4fbf\u89c0\u5bdf\u8a13\u7df4\u7d50\u679c\uff0c\u56e0\u6b64\u6211\u5011\u53ea\u6311\u9078\u5176\u4e2d\u5169\u500b\u7279\u5fb5\u4e26\u7e6a\u88fd\u5e73\u9762\u7684\u9ede\u6563\u5716\u3002\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#day-9-logistic-regression","text":"","title":"[Day 9] \u908f\u8f2f\u8ff4\u6b78 (Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_1","text":"\u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78 \u7dda\u6027\u5206\u985e\u5668 \u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236 \u6bd4\u8f03\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78 \u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 \u908f\u8f2f\u8ff4\u6b78\u7a0b\u5f0f\u624b\u628a\u624b \u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5efa\u7acb\u9cf6\u5c3e\u82b1\u6735\u5206\u985e\u5668 \u7bc4\u4f8b\u7a0b\u5f0f\uff1a","title":"\u4eca\u65e5\u5b78\u7fd2\u76ee\u6a19"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_2","text":"\u908f\u8f2f\u8ff4\u6b78 (Logistic Regression) \u662f\u7531\u7dda\u6027\u8ff4\u6b78\u8b8a\u5316\u800c\u4f86\u7684\uff0c\u5b83\u662f\u4e00\u7a2e\u5206\u985e\u7684\u6a21\u578b\u3002\u5176\u76ee\u6a19\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u6240\u6709\u6578\u64da\u6e05\u695a\u5730\u5206\u958b\u4e26\u505a\u5206\u985e\uff0c\u6211\u5011\u53c8\u53ef\u4ee5\u7a31\u8ff4\u6b78\u7684\u7dda\u6027\u5206\u985e\u5668\u3002\u908f\u8f2f\u8ff4\u6b78\u5176\u5be6\u662f\u5728\u8aaa\u660e\u4e00\u500b\u6a5f\u7387\u7684\u610f\u7fa9\uff0c\u900f\u904e\u4e00\u500b function \u53bb\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\uff0c\u4e0d\u540c\u7684 w,b \u5c31\u6703\u5f97\u5230\u4e0d\u540c\u7684 function\u3002\u65bc\u662f\u6211\u5011\u53ef\u4ee5\u8aaa f w,b (x) \u5373\u70ba posteriror probability\u3002","title":"\u8a8d\u8b58\u908f\u8f2f\u8ff4\u6b78"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_3","text":"\u908f\u8f2f\u8ff4\u6b78\u662f\u7528\u4f86\u8655\u7406\u5206\u985e\u554f\u984c\uff0c\u76ee\u6a19\u662f\u627e\u5230\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u5c07\u8cc7\u6599\u505a\u5206\u985e\u3002\u4e3b\u8981\u662f\u5229\u7528 sigmoid function \u5c07\u8f38\u51fa\u8f49\u63db\u6210 0~1 \u7684\u503c\uff0c\u8868\u793a\u53ef\u80fd\u70ba\u9019\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002\u800c\u7dda\u6027\u8ff4\u6b78\u662f\u7528\u4f86\u9810\u6e2c\u4e00\u500b\u9023\u7e8c\u7684\u503c\uff0c\u76ee\u6a19\u662f\u60f3\u627e\u4e00\u689d\u76f4\u7dda\u53ef\u4ee5\u903c\u8fd1\u771f\u5be6\u7684\u8cc7\u6599\u3002","title":"\u7dda\u6027\u8ff4\u6b78\u8207\u908f\u8f2f\u8ff4\u6b78"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_4","text":"\u908f\u8f2f\u8ff4\u6b78\u662f\u4e00\u500b\u6700\u57fa\u672c\u7684\u4e8c\u5143\u7dda\u6027\u5206\u985e\u5668\u3002\u6211\u5011\u8981\u627e\u4e00\u500b\u6a5f\u7387 (posterior probability) \u7576\u6a5f\u7387 P(C1|x) \u5927\u65bc 0.5 \u6642\u5247\u8f38\u51fa\u9810\u6e2c Class 1\uff0c\u53cd\u4e4b\u6a5f\u7387\u5c0f\u65bc 0.5 \u5247\u8f38\u51fa Class 2\u3002\u5982\u679c\u6211\u5011\u5047\u8a2d\u8cc7\u6599\u662f Gaussian \u6a5f\u7387\u5206\u4f48\uff0c\u6211\u5011\u53ef\u4ee5\u8aaa\u9019\u500b posterior probability \u5c31\u662f \ud835\udf0e(\ud835\udc67)\u3002\u5176\u4e2d z=w*x+b \uff0cx \u70ba\u8f38\u5165\u7279\u5fb5\uff0c\u800c w \u8207 b \u5206\u5225\u70ba\u6b0a\u91cd(weight)\u8207\u504f\u6b0a\u503c(bias) \u4ed6\u5011\u662f\u900f\u904e\u8a13\u7df4\u5f97\u5230\u7684\u4e00\u7d44\u53c3\u6578\u3002 \u4ee5\u4e0b\u5c31\u662f\u4e00\u500b\u908f\u8f2f\u8ff4\u6b78\u7684\u904b\u4f5c\u6a5f\u5236\uff0c\u5982\u679c\u4ee5\u5716\u50cf\u5316\u8868\u793a\u6703\u9577\u9019\u6a23\u3002\u6211\u5011\u7684 function \u6703\u6709\u5169\u7d44\u53c3\u6578\uff0c\u4e00\u7d44\u662f w \u6211\u5011\u7a31\u70ba weight\uff0c\u53e6\u4e00\u500b\u5e38\u6578 b \u7a31\u70ba bias\u3002\u5047\u8a2d\u6211\u5011\u6709\u5169\u500b\u8f38\u5165\u7279\u5fb5\uff0c\u4e26\u5c07\u9019\u5169\u500b\u8f38\u5165\u5206\u5225\u4e58\u4e0a w \u518d\u52a0\u4e0a b \u5c31\u53ef\u4ee5\u5f97\u5230 z\uff0c\u7136\u5f8c\u901a\u904e\u4e00\u500b sigmoid function \u5f97\u5230\u7684\u8f38\u51fa\u5c31\u662f posterior probability\u3002 \u5728\u908f\u8f2f\u8ff4\u6b78\u4e2d\u6211\u5011\u5b9a\u7fa9\u7684\u640d\u5931\u51fd\u6578\u662f\u8981\u53bb\u6700\u5c0f\u5316\u7684\u5c0d\u8c61\u662f\u6240\u6709\u8a13\u7df4\u8cc7\u6599 cross entropy \u7684\u7e3d\u548c\u3002\u6211\u5011\u5e0c\u671b\u6a21\u578b\u7684\u8f38\u51fa\u8981\u8ddf\u76ee\u6a19\u7b54\u6848\u8981\u8d8a\u63a5\u8fd1\u8d8a\u597d\u3002\u56e0\u6b64\u6211\u5011\u53ef\u4ee5\u5c07\u6700\u5c0f\u5316\u7684\u76ee\u6a19\u5beb\u6210\u4e00\u500b\u51fd\u6578\uff1a \u6700\u5f8c\u662f\u5c0b\u627e\u4e00\u7d44\u6700\u597d\u7684\u53c3\u6578\uff0c\u4f7f\u5f97 loss \u80fd\u5920\u6700\u4f4e\u3002\u56e0\u6b64\u9019\u88e1\u63a1\u7528\u68af\u5ea6\u4e0b\u964d (Gradient Descent) \u4f86\u6700\u5c0f\u5316\u4ea4\u53c9\u71b5 (Cross Entropy)\u3002\u6211\u5011\u5c07\u640d\u5931\u51fd\u6578\u5c0d\u6b0a\u91cd\u6c42\u504f\u5c0e\u5f8c\uff0c\u53ef\u4ee5\u5f97\u5230\u4e0b\u9762\u7684\u6b0a\u91cd\u66f4\u65b0\u7684\u5f0f\u5b50\uff1a","title":"\u908f\u8f2f\u8ff4\u6b78\u5b78\u7fd2\u6a5f\u5236"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#multinomial-logistic-regression","text":"\u5728 Sklearn \u4e2d\u4e5f\u80fd\u4f7f\u7528\u908f\u8f2f\u8ff4\u6b78\u5206\u985e\u5668\u61c9\u7528\u5728\u591a\u985e\u5225\u7684\u5206\u985e\u554f\u984c\u4e0a\uff0c\u5c0d\u65bc\u591a\u5143\u908f\u8f2f\u8ff4\u6b78\u6709 one-vs-rest(OvR) \u548c many-vs-many(MvM) \u5169\u7a2e\u65b9\u6cd5\u3002\u5169\u8005\u7684\u505a\u6cd5\u90fd\u662f\u5c07\u6240\u6709\u985e\u5225\u7684\u8cc7\u6599\u4f9d\u5e8f\u4f5c\u4e8c\u5143\u5206\u985e\u8a13\u7df4\u3002MvM \u76f8\u8f03\u65bc OvR \u6bd4\u8f03\u7cbe\u6e96\uff0c\u4f46 liblinear \u53ea\u652f\u63f4 OvR\u3002 one-vs-rest(OvR): \u8a13\u7df4\u6642\u628a\u67d0\u500b\u985e\u5225\u7684\u8cc7\u6599\u6b78\u70ba\u4e00\u985e\uff0c\u5176\u4ed6\u5269\u9918\u7684\u8cc7\u6599\u6b78\u70ba\u53e6\u4e00\u985e\u505a\u908f\u8f2f\u8ff4\u6b78\uff0c\u56e0\u6b64\u82e5\u6709 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u6703\u6709 k \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u9996\u5148\u62bd\u53d6 A \u985e\u5225\u7684\u8cc7\u6599\u505a\u70ba\u6b63\u96c6\uff0cB\u3001C \u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; B \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001C \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6; C \u985e\u5225\u7684\u8cc7\u6599\u4f5c\u70ba\u6b63\u96c6\uff0cA\u3001B \u985e\u5225\u985e\u5225\u8cc7\u6599\u505a\u70ba\u8ca0\u96c6\u3002\u900f\u904e\u9019\u4e09\u7d44\u8a13\u7df4\u96c6\u5206\u5225\u9032\u884c\u8a13\u7df4\uff0c\u7136\u5f8c\u7684\u5f97\u5230\u4e09\u500b\u5206\u985e\u5668 f1(x)\u3001f2(x)\u3001f3(x)\u3002\u9810\u6e2c\u7684\u6642\u5019\u5c31\u662f\u628a\u8cc7\u6599\u4e1f\u9032\u4e09\u500b\u5206\u985e\u5668\uff0c\u67e5\u770b\u54ea\u500b\u5206\u985e\u5668\u9810\u6e2c\u7684\u5206\u6578\u6700\u9ad8\u5c31\u6c7a\u5b9a\u8a72\u985e\u5225\u3002 many-vs-many(MvM): \u8207 OvR \u5dee\u5225\u5728\u65bc\u8a13\u7df4\u6642\u6bcf\u6b21\u53ea\u6703\u6311\u5169\u500b\u985e\u5225\u8a13\u7df4\u4e00\u500b\u5206\u985e\u5668\uff0c\u56e0\u6b64 k \u500b\u985e\u5225\u7684\u8cc7\u6599\u5c31\u9700\u8981 k(k-1)/2 \u500b\u4e8c\u5143\u5206\u985e\u5668\u3002\u5047\u5982\u6709\u4e09\u500b\u985e\u5225 A\u3001B\u3001C\uff0c\u56e0\u6b64\u6211\u5011\u6703\u6709\u4e09\u7d44\u4e8c\u5143\u5206\u985e\u5668\u5206\u5225\u6709 (A\u3001B)\u3001(A\u3001C) \u8207 (B\u3001C)\u3002\u8a13\u7df4\u5b8c\u6210\u5f8c\u7576\u6709\u65b0\u8cc7\u6599\u8981\u9810\u6e2c\u6642\uff0c\u628a\u8cc7\u6599\u5206\u5225\u5c0d\u4e09\u500b\u4e8c\u5143\u5206\u985e\u5668\u9032\u884c\u9810\u6e2c\uff0c\u6700\u7d42\u591a\u6578\u6c7a\u7684\u65b9\u5f0f\u5f97\u5230\u9810\u6e2c\u7d50\u679c\u3002","title":"\u591a\u5143\u5206\u985e\u908f\u8f2f\u8ff4\u6b78 (Multinomial Logistic Regression)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_5","text":"","title":"[\u7a0b\u5f0f\u5be6\u4f5c]"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#_6","text":"\u908f\u8f2f\u8ff4\u6b78\u96d6\u7136\u6709\u8ff4\u6b78\u5169\u5b57\u4f46\u4ed6\u5176\u5be6\u662f\u88ab\u7528\u4f86\u505a\u5206\u985e\u7684\uff0c\u76ee\u7684\u662f\u8981\u627e\u51fa\u4e00\u689d\u76f4\u7dda\u80fd\u5920\u5c07\u5169\u500b\u985e\u5225\u5206\u958b\u3002\b\u672c\u7bc4\u4f8b\u63a1\u7528\u9cf6\u5c3e\u82b1\u6735\u8cc7\u6599\u96c6\u505a\u5206\u985e\u5668\u5be6\u9a57\uff0c\u5e0c\u671b\u80fd\u5920\u900f\u904e\u7dda\u6027\u5206\u985e\u5668\u5c07\u4e09\u500b\u985e\u5225\u5f7c\u6b64\u5340\u9694\u958b\u3002 Parameters: - penalty: \u6b63\u898f\u5316l1/l2\uff0c\u9632\u6b62\u6a21\u578b\u904e\u5ea6\u64ec\u5408\u3002 - C: \u6578\u503c\u8d8a\u5927\u5c0d weight \u7684\u63a7\u5236\u529b\u8d8a\u5f31\uff0c\u9810\u8a2d\u70ba1\u3002 - n_init: \u9810\u8a2d\u70ba10\u6b21\u96a8\u6a5f\u521d\u59cb\u5316\uff0c\u9078\u64c7\u6548\u679c\u6700\u597d\u7684\u4e00\u7a2e\u4f86\u4f5c\u70ba\u6a21\u578b\u3002 - solver: \u512a\u5316\u5668\u7684\u9078\u64c7\u3002newton-cg,lbfgs,liblinear,sag,saga\u3002\u9810\u8a2d\u70baliblinear\u3002 - multi_class: \u9078\u64c7\u5206\u985e\u65b9\u5f0f\uff0covr\u5c31\u662fone-vs-rest(OvR)\uff0c\u800cmultinomial\u5c31\u662fmany-vs-many(MvM)\u3002\u9810\u8a2d\u70ba auto\uff0c\u6545\u6a21\u578b\u8a13\u7df4\u4e2d\u6703\u53d6\u4e00\u500b\u6700\u597d\u7684\u7d50\u679c\u3002 - max_iter: \u8fed\u4ee3\u6b21\u6578\uff0c\u9810\u8a2d\u70ba100\u4ee3\u3002 - class_weight: \u82e5\u9047\u8cc7\u6599\u4e0d\u5e73\u8861\u554f\u984c\u53ef\u4ee5\u8a2d\u5b9abalance\uff0c\u9810\u8a2d=None\u3002 - random_state: \u4e82\u6578\u7a2e\u5b50\u50c5\u5728solver=sag/liblinear\u6642\u6709\u7528\u3002 Attributes: - coef_: \u53d6\u5f97\u659c\u7387\u3002 - intercept_: \u53d6\u5f97\u622a\u8ddd\u3002 Methods: - fit: \u653e\u5165X\u3001y\u9032\u884c\u6a21\u578b\u64ec\u5408\u3002 - predict: \u9810\u6e2c\u4e26\u56de\u50b3\u9810\u6e2c\u985e\u5225\u3002 - predict_proba: \u9810\u6e2c\u6bcf\u500b\u985e\u5225\u7684\u6a5f\u7387\u503c\u3002 - score: \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b\u3002 from sklearn.linear_model import LogisticRegression # \u5efa\u7acbLogistic\u6a21\u578b logisticModel = LogisticRegression ( random_state = 0 ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u8a13\u7df4\u6a21\u578b logisticModel . fit ( X_train , y_train ) # \u4f7f\u7528\u8a13\u7df4\u8cc7\u6599\u9810\u6e2c\u5206\u985e predicted = logisticModel . predict ( X_train )","title":"\u908f\u8f2f\u8ff4\u6b78 (\u5206\u985e\u5668)"},{"location":"9.\u908f\u8f2f\u8ff4\u6b78/#score","text":"\u6211\u5011\u53ef\u4ee5\u76f4\u63a5\u547c\u53eb score() \u76f4\u63a5\u8a08\u7b97\u6a21\u578b\u9810\u6e2c\u7684\u6e96\u78ba\u7387\u3002 # \u9810\u6e2c\u6210\u529f\u7684\u6bd4\u4f8b print ( '\u8a13\u7df4\u96c6: ' , logisticModel . score ( X_train , y_train )) print ( '\u6e2c\u8a66\u96c6: ' , logisticModel . score ( X_test , y_test )) \u8f38\u51fa\u7d50\u679c\uff1a \u8a13\u7df4\u96c6: 0.9714285714285714 \u6e2c\u8a66\u96c6: 0.9333333333333333 \u900f\u904e Sklearn \u7684 LogisticRegression \u53ef\u4ee5\u5be6\u4f5c\u4e00\u500b\u5178\u578b\u7684\u4e8c\u5143\u5206\u985e\u5668\u3002\u4e0d\u904e\u7576\u6709\u591a\u500b\u985e\u5225\u7684\u6642\u5019\uff0c\u6211\u5011\u53ef\u4ee5\u900f\u904e\u53c3\u6578 multi_class \u4f86\u8a2d\u5b9a\u591a\u5143\u5206\u985e\u5668\u7684\u5b78\u7fd2\u6a5f\u5236\u3002\u6211\u5011\u53ef\u4ee5\u89c0\u5bdf\u4e00\u4e0b\u8a13\u7df4\u597d\u7684\u6a21\u578b\u5728\u6e2c\u8a66\u96c6\u4e0a\u7684\u9810\u6e2c\u80fd\u529b\uff0c\u70ba\u4e86\u65b9\u4fbf\u89c0\u5bdf\u8a13\u7df4\u7d50\u679c\uff0c\u56e0\u6b64\u6211\u5011\u53ea\u6311\u9078\u5176\u4e2d\u5169\u500b\u7279\u5fb5\u4e26\u7e6a\u88fd\u5e73\u9762\u7684\u9ede\u6563\u5716\u3002\u4e0b\u5716\u4e2d\u5de6\u908a\u7684\u662f\u6e2c\u8a66\u96c6\u7684\u771f\u5be6\u5206\u985e\uff0c\u53f3\u908a\u7684\u662f\u6a21\u578b\u9810\u6e2c\u7684\u5206\u985e\u7d50\u679c\u3002 \u672c\u7cfb\u5217\u6559\u5b78\u5167\u5bb9\u53ca\u7bc4\u4f8b\u7a0b\u5f0f\u90fd\u53ef\u4ee5\u5f9e\u6211\u7684 GitHub \u53d6\u5f97\uff01","title":"\u4f7f\u7528Score\u8a55\u4f30\u6a21\u578b"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index f1a1645..bc55294 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,152 +2,152 @@ None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily None - 2024-09-16 + 2024-10-08 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 4d355d2..f319720 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ