diff --git a/Exp3_SMS_Spam_Recognition/datasets/5f9ae242cae5285cd734b91e-momodel/scu_stopwords.txt b/Exp3_SMS_Spam_Recognition/datasets/5f9ae242cae5285cd734b91e-momodel/scu_stopwords.txt
deleted file mode 100644
index 78abc66..0000000
--- a/Exp3_SMS_Spam_Recognition/datasets/5f9ae242cae5285cd734b91e-momodel/scu_stopwords.txt
+++ /dev/null
@@ -1,976 +0,0 @@
-打开天窗说亮话
-到目前为止
-赶早不赶晚
-常言说得好
-何乐而不为
-毫无保留地
-由此可见
-这就是说
-这么点儿
-综上所述
-总的来看
-总的来说
-总的说来
-总而言之
-相对而言
-除此之外
-反过来说
-恰恰相反
-如上所述
-换句话说
-具体地说
-具体说来
-另一方面
-与此同时
-一则通过
-毫无例外
-不然的话
-从此以后
-从古到今
-从古至今
-从今以后
-大张旗鼓
-从无到有
-从早到晚
-弹指之间
-不亦乐乎
-不知不觉
-不止一次
-不择手段
-不可开交
-不可抗拒
-不仅仅是
-不管怎样
-挨家挨户
-长此下去
-长话短说
-除此而外
-除此以外
-除此之外
-得天独厚
-川流不息
-长期以来
-挨门挨户
-挨门逐户
-多多少少
-多多益善
-二话不说
-更进一步
-二话没说
-分期分批
-风雨无阻
-归根到底
-归根结底
-反之亦然
-大面儿上
-倒不如说
-成年累月
-换句话说
-或多或少
-简而言之
-接连不断
-尽如人意
-尽心竭力
-尽心尽力
-尽管如此
-据我所知
-具体地说
-具体来说
-具体说来
-近几年来
-每时每刻
-屡次三番
-三番两次
-三番五次
-三天两头
-另一方面
-老老实实
-年复一年
-恰恰相反
-顷刻之间
-穷年累月
-千万千万
-日复一日
-如此等等
-如前所述
-如上所述
-一方面
-切不可
-顷刻间
-全身心
-另方面
-另一个
-猛然间
-默默地
-就是说
-近年来
-尽可能
-接下来
-简言之
-急匆匆
-即是说
-基本上
-换言之
-充其极
-充其量
-暗地里
-反之则
-比如说
-背地里
-背靠背
-并没有
-不得不
-不得了
-不得已
-不仅仅
-不经意
-不能不
-不外乎
-不由得
-不怎么
-不至于
-策略地
-差不多
-常言道
-常言说
-多年来
-多年前
-差一点
-敞开儿
-抽冷子
-大不了
-反倒是
-反过来
-大体上
-当口儿
-倒不如
-怪不得
-动不动
-看起来
-看上去
-看样子
-够瞧的
-到了儿
-呆呆地
-来不及
-来得及
-到头来
-连日来
-于是乎
-为什么
-这会儿
-换言之
-那会儿
-那么些
-那么样
-什么样
-反过来
-紧接着
-就是说
-要不然
-要不是
-一方面
-以至于
-自个儿
-自各儿
-之所以
-这么些
-这么样
-怎么办
-怎么样
-谁知
-顺着
-似的
-虽然
-虽说
-虽则
-随着
-所以
-他们
-他人
-它们
-她们
-倘或
-倘然
-倘若
-倘使
-要么
-要是
-也罢
-也好
-以便
-依照
-以及
-以免
-以至
-以致
-抑或
-因此
-因而
-因为
-由于
-有的
-有关
-有些
-于是
-与否
-与其
-越是
-云云
-一般
-一旦
-一来
-一切
-一样
-同时
-万一
-为何
-为了
-为着
-嗡嗡
-我们
-呜呼
-乌乎
-无论
-无宁
-沿着
-毋宁
-向着
-照着
-怎么
-咱们
-在下
-再说
-再者
-怎样
-这边
-这儿
-这个
-这里
-这么
-这时
-这些
-这样
-正如
-之类
-之一
-只是
-只限
-只要
-只有
-至于
-诸位
-着呢
-纵令
-纵然
-纵使
-遵照
-作为
-喔唷
-自从
-自己
-自家
-自身
-总之
-要不
-哎呀
-哎哟
-俺们
-按照
-吧哒
-罢了
-本着
-比方
-比如
-鄙人
-彼此
-别的
-别说
-并且
-不比
-不成
-不单
-不但
-不独
-不管
-不光
-不过
-不仅
-不拘
-不论
-不怕
-不然
-不如
-不特
-不惟
-不问
-不只
-朝着
-趁着
-除非
-除了
-此间
-此外
-从而
-但是
-当着
-的话
-等等
-叮咚
-对于
-多少
-而况
-而且
-而是
-而外
-而言
-而已
-尔后
-反之
-非但
-非徒
-否则
-嘎登
-各个
-各位
-各种
-各自
-根据
-故此
-固然
-关于
-果然
-果真
-哈哈
-何处
-何况
-何时
-哼唷
-呼哧
-还是
-还有
-或是
-或者
-极了
-及其
-及至
-即便
-即或
-即令
-即若
-即使
-既然
-既是
-继而
-加之
-假如
-假若
-假使
-鉴于
-几时
-较之
-接着
-结果
-进而
-尽管
-经过
-就是
-可见
-可是
-可以
-况且
-开始
-开外
-来着
-例如
-连同
-两者
-另外
-慢说
-漫说
-每当
-莫若
-某个
-某些
-哪边
-哪儿
-哪个
-哪里
-哪年
-哪怕
-哪天
-哪些
-哪样
-那边
-那儿
-那个
-那里
-那么
-那时
-那些
-那样
-乃至
-宁可
-宁肯
-宁愿
-你们
-啪达
-旁人
-凭借
-其次
-其二
-其他
-其它
-其一
-其余
-其中
-起见
-起见
-岂但
-前后
-前者
-然而
-然后
-然则
-人家
-任何
-任凭
-如此
-如果
-如何
-如其
-如若
-若非
-若是
-上下
-尚且
-设若
-设使
-甚而
-甚么
-甚至
-省得
-时候
-什么
-使得
-是的
-首先
-首先
-其次
-再次
-最后
-您们
-它们
-她们
-他们
-我们
-你是
-您是
-我是
-他是
-她是
-它是
-不是
-你们
-啊哈
-啊呀
-啊哟
-挨次
-挨个
-挨着
-哎呀
-哎哟
-俺们
-按理
-按期
-默然
-按时
-按说
-按照
-暗中
-暗自
-昂然
-八成
-倍感
-倍加
-本人
-本身
-本着
-并非
-别人
-必定
-比起
-比如
-比照
-鄙人
-毕竟
-必将
-必须
-并肩
-并没
-并排
-并且
-并无
-勃然
-不必
-不常
-不大
-不单
-不但
-而且
-不得
-不迭
-不定
-不独
-不对
-不妨
-不管
-不光
-不过
-不会
-不仅
-不拘
-不力
-不了
-不料
-不论
-不满
-不免
-不起
-不巧
-不然
-不日
-不少
-不胜
-不时
-不是
-不同
-不能
-不要
-不外
-不下
-不限
-不消
-不已
-不再
-不曾
-不止
-不只
-才能
-彻夜
-趁便
-趁机
-趁热
-趁势
-趁早
-趁着
-成心
-乘机
-乘势
-乘隙
-乘虚
-诚然
-迟早
-充分
-出来
-出去
-除此
-除非
-除开
-除了
-除去
-除却
-除外
-处处
-传说
-传闻
-纯粹
-此后
-此间
-此外
-此中
-次第
-匆匆
-从不
-从此
-从而
-从宽
-从来
-从轻
-从速
-从头
-从未
-从小
-从新
-从严
-从优
-从中
-从重
-凑巧
-存心
-达旦
-打从
-大大
-大抵
-大都
-大多
-大凡
-大概
-大家
-大举
-大略
-大约
-大致
-待到
-单纯
-单单
-但是
-但愿
-当场
-当儿
-当即
-当然
-当庭
-当头
-当下
-当真
-当中
-当着
-倒是
-到处
-到底
-到头
-得起
-的话
-的确
-等到
-等等
-顶多
-动辄
-陡然
-独自
-断然
-对于
-顿时
-多次
-多多
-多亏
-而后
-而论
-而且
-而是
-而外
-而言
-而已
-而又
-尔等
-反倒
-反而
-反手
-反之
-方才
-方能
-非常
-非但
-非得
-分头
-奋勇
-愤然
-更为
-更加
-根据
-个人
-各式
-刚才
-敢情
-该当
-嘎嘎
-否则
-赶快
-敢于
-刚好
-刚巧
-高低
-格外
-隔日
-隔夜
-公然
-过于
-果然
-果真
-光是
-关于
-共总
-姑且
-故此
-故而
-故意
-固然
-惯常
-毫不
-毫无
-很多
-何须
-好在
-何必
-何尝
-何妨
-何苦
-何况
-何止
-很少
-轰然
-后来
-呼啦
-哗啦
-互相
-忽地
-忽然
-话说
-或是
-伙同
-豁然
-恍然
-还是
-或许
-或者
-基本
-基于
-极大
-极度
-极端
-极力
-极其
-极为
-即便
-即将
-及其
-及至
-即刻
-即令
-即使
-几度
-几番
-几乎
-几经
-既然
-继而
-继之
-加上
-加以
-加之
-假如
-假若
-假使
-间或
-将才
-简直
-鉴于
-将近
-将要
-交口
-较比
-较为
-较之
-皆可
-截然
-截至
-藉以
-借此
-借以
-届时
-尽快
-近来
-进而
-进来
-进去
-尽管
-尽量
-尽然
-就算
-居然
-就此
-就地
-竟然
-究竟
-经常
-尽早
-精光
-经过
-就是
-局外
-举凡
-据称
-据此
-据实
-据说
-可好
-看来
-开外
-绝不
-决不
-据悉
-决非
-绝顶
-绝对
-绝非
-可见
-可能
-可是
-可以
-恐怕
-来讲
-来看
-快要
-况且
-拦腰
-牢牢
-老是
-累次
-累年
-理当
-理该
-理应
-例如
-立地
-立刻
-立马
-立时
-联袂
-连连
-连日
-路经
-临到
-连声
-连同
-连袂
-另外
-另行
-屡次
-屡屡
-缕缕
-率尔
-率然
-略加
-略微
-略为
-论说
-马上
-猛然
-没有
-每当
-每逢
-每每
-莫不
-莫非
-莫如
-莫若
-哪怕
-那么
-那末
-那些
-乃至
-难道
-难得
-难怪
-难说
-你们
-凝神
-宁可
-宁肯
-宁愿
-偶而
-偶尔
-碰巧
-譬如
-偏偏
-平素
-迫于
-扑通
-其次
-其后
-其实
-其它
-起初
-起来
-起首
-起头
-起先
-岂但
-岂非
-岂止
-恰逢
-恰好
-恰恰
-恰巧
-恰如
-恰似
-前后
-前者
-切莫
-切切
-切勿
-亲口
-亲身
-亲手
-亲眼
-亲自
-顷刻
-请勿
-取道
-权时
-全都
-全力
-全年
-全然
-然而
-然后
-人家
-人人
-仍旧
-仍然
-日见
-日渐
-日益
-日臻
-如常
-如次
-如果
-如今
-如期
-如若
-如上
-如下
-上来
-上去
-瑟瑟
-沙沙
-啊
-哎
-唉
-俺
-按
-吧
-把
-甭
-别
-嘿
-很
-乎
-会
-或
-既
-及
-啦
-了
-们
-你
-您
-哦
-砰
-啊
-你
-我
-他
-她
-它
\ No newline at end of file
diff --git a/Exp3_SMS_Spam_Recognition/main.ipynb b/Exp3_SMS_Spam_Recognition/main.ipynb
index bb12ad7..37a1717 100644
--- a/Exp3_SMS_Spam_Recognition/main.ipynb
+++ b/Exp3_SMS_Spam_Recognition/main.ipynb
@@ -7,6 +7,27 @@
     "# 自然语言处理领域 - 垃圾短信识别"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# 读取txt文件\n",
+    "df = pd.read_csv('80w.txt', sep='\\t', header=None, names=['id', 'label', 'text'])\n",
+    "\n",
+    "# 对text列进行分词处理\n",
+    "df['words'] = df['text'].apply(lambda x: ' '.join(list(x)))\n",
+    "\n",
+    "# 重新排列列的顺序\n",
+    "df = df[['label', 'text', 'words']]\n",
+    "\n",
+    "# 保存为csv文件\n",
+    "df.to_csv('output.csv', index=False)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -92,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -107,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -187,7 +208,7 @@
        "4                            23 年 从 盐城 拉回来 的 麻麻 的 嫁妆  "
       ]
      },
-     "execution_count": 6,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -195,6 +216,7 @@
    "source": [
     "# 数据集的路径\n",
     "data_path = \"./datasets/5f9ae242cae5285cd734b91e-momodel/sms_pub.csv\"\n",
+    "\n",
     "# 读取数据\n",
     "sms = pd.read_csv(data_path, encoding='utf-8')\n",
     "# 显示前 5 条数据\n",
@@ -203,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -298,7 +320,7 @@
        "1       79146   79146  南口 阿玛施 新春 第一批 限量 春装 到 店 啦                  ...    1  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -333,7 +355,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -351,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -389,16 +411,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array(['cab', 'call', 'me', 'please', 'tonight', 'you'], dtype=object)"
+       "['cab', 'call', 'me', 'please', 'tonight', 'you']"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -415,13 +437,12 @@
     "vect.fit(simple_train)\n",
     "\n",
     "# 查看学习到的词汇表\n",
-    "# vect.get_feature_names()\n",
-    "vect.get_feature_names_out()"
+    "vect.get_feature_names()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -431,7 +452,7 @@
        "\twith 9 stored elements in Compressed Sparse Row format>"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -445,7 +466,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -456,7 +477,7 @@
        "       [0, 1, 1, 2, 0, 0]])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -468,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -539,7 +560,7 @@
        "2    0     1   1       2        0    0"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -591,7 +612,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -662,7 +683,7 @@
        "2  0.000000  0.266075  0.342620  0.901008  0.000000  0.000000"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -676,7 +697,7 @@
     "# 将稀疏矩阵转为一般矩阵\n",
     "simple_train_dtm.toarray()\n",
     "# 结合词汇表和转为得到的矩阵来直观查看内容\n",
-    "pd.DataFrame(simple_train_dtm.toarray(), columns=tfidf.get_feature_names_out())"
+    "pd.DataFrame(simple_train_dtm.toarray(), columns=tfidf.get_feature_names())"
    ]
   },
   {
@@ -711,7 +732,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 73,
    "metadata": {},
    "outputs": [
     {
@@ -719,8 +740,8 @@
      "output_type": "stream",
      "text": [
       "总共的数据大小 (786610,)\n",
-      "训练集数据大小 (707949,)\n",
-      "测试集数据大小 (78661,)\n"
+      "训练集数据大小 (629288,)\n",
+      "测试集数据大小 (157322,)\n"
      ]
     }
    ],
@@ -729,7 +750,7 @@
     "from sklearn.model_selection import train_test_split\n",
     "X = np.array(sms.msg_new)\n",
     "y = np.array(sms.label)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)\n",
     "print(\"总共的数据大小\", X.shape)\n",
     "print(\"训练集数据大小\", X_train.shape)\n",
     "print(\"测试集数据大小\", X_test.shape)"
@@ -744,7 +765,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -809,27 +830,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 164 ms, sys: 0 ns, total: 164 ms\n",
-      "Wall time: 167 ms\n"
+      "CPU times: user 154 ms, sys: 361 µs, total: 154 ms\n",
+      "Wall time: 169 ms\n"
      ]
     },
     {
      "data": {
-      "text/html": [
-       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MultinomialNB</label><div class=\"sk-toggleable__content\"><pre>MultinomialNB()</pre></div></div></div></div></div>"
-      ],
       "text/plain": [
-       "MultinomialNB()"
+       "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -842,16 +860,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([0, 0, 0, ..., 1, 1, 1])"
+       "array([0, 0, 0, ..., 0, 0, 1])"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -864,7 +882,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -872,20 +890,26 @@
      "output_type": "stream",
      "text": [
       "在测试集上的混淆矩阵：\n",
-      "[[69594  1220]\n",
-      " [  136  7711]]\n",
-      "在测试集上的分类结果报告：\n",
+      "[[139190   2406]\n",
+      " [   297  15429]]\n",
+      "在测试集上的分类结果报告：\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "              precision    recall  f1-score   support\n",
       "\n",
-      "           0       1.00      0.98      0.99     70814\n",
-      "           1       0.86      0.98      0.92      7847\n",
+      "           0       1.00      0.98      0.99    141596\n",
+      "           1       0.87      0.98      0.92     15726\n",
       "\n",
-      "    accuracy                           0.98     78661\n",
-      "   macro avg       0.93      0.98      0.95     78661\n",
-      "weighted avg       0.98      0.98      0.98     78661\n",
+      "    accuracy                           0.98    157322\n",
+      "   macro avg       0.93      0.98      0.95    157322\n",
+      "weighted avg       0.98      0.98      0.98    157322\n",
       "\n",
       "在测试集上的 f1-score ：\n",
-      "0.9191798784122064\n"
+      "0.9194600876016805\n"
      ]
     }
    ],
@@ -916,7 +940,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -932,7 +956,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -943,7 +967,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -951,20 +975,20 @@
      "output_type": "stream",
      "text": [
       "在测试集上的混淆矩阵：\n",
-      "[[69594  1220]\n",
-      " [  136  7711]]\n",
+      "[[139190   2406]\n",
+      " [   297  15429]]\n",
       "在测试集上的分类结果报告：\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "           0       1.00      0.98      0.99     70814\n",
-      "           1       0.86      0.98      0.92      7847\n",
+      "           0       1.00      0.98      0.99    141596\n",
+      "           1       0.87      0.98      0.92     15726\n",
       "\n",
-      "    accuracy                           0.98     78661\n",
-      "   macro avg       0.93      0.98      0.95     78661\n",
-      "weighted avg       0.98      0.98      0.98     78661\n",
+      "    accuracy                           0.98    157322\n",
+      "   macro avg       0.93      0.98      0.95    157322\n",
+      "weighted avg       0.98      0.98      0.98    157322\n",
       "\n",
       "在测试集上的 f1-score ：\n",
-      "0.9191798784122064\n"
+      "0.9194600876016805\n"
      ]
     }
    ],
@@ -1011,7 +1035,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 69,
    "metadata": {
     "deletable": false,
     "select": true
@@ -1028,6 +1052,7 @@
     "# stopwords_path = r'stopwords/cn_stopwords.txt'\n",
     "# stopwords_path = r'stopwords/hit_stopwords.txt'\n",
     "stopwords_path = r'stopwords/scu_stopwords.txt'\n",
+    "# stopwords_path = r'stopwords/stopWord.txt'\n",
     "\n",
     "# ---------------------------------------------------\n",
     "\n",
@@ -1054,7 +1079,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 101,
    "metadata": {
     "deletable": false,
     "select": true
@@ -1067,6 +1092,7 @@
     "from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler,  PowerTransformer\n",
     "\n",
     "from sklearn.naive_bayes import BernoulliNB, MultinomialNB, ComplementNB\n",
+    "from sklearn.linear_model import LogisticRegression\n",
     "\n",
     "# pipline_list用于传给Pipline作为参数\n",
     "pipeline_list = [\n",
@@ -1074,32 +1100,40 @@
     "    \n",
     "    # ========================== 以下代码仅供参考 =============================\n",
     "# Vectorizer\n",
-    "#     ('cv', CountVectorizer(token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "    # ('cv', CountVectorizer(token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "    # ('cv', CountVectorizer(ngram_range=(1,2), token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "    # ('cv', CountVectorizer(ngram_range=(1,3), max_features=5000, token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "    # ('cv', CountVectorizer(token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
     "\t# ('hv',  HashingVectorizer(token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)), # ValueError: Negative values in data passed to MultinomialNB (input X)\n",
-    "\t('tv',  TfidfVectorizer(ngram_range=(1,3), token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "    ('tv',  TfidfVectorizer(ngram_range=(1,2), max_df=0.25, token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "\t# ('tv',  TfidfVectorizer(ngram_range=(1,3), token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
+    "\t# ('tv',  TfidfVectorizer(token_pattern=r\"(?u)\\b\\w+\\b\", stop_words=stopwords)),\n",
     "\t\n",
     "# Scaler\n",
     "    # ('ss',StandardScaler(with_mean=False)),\n",
     "    # ('mms',MinMaxScaler()), # TypeError: MinMaxScaler does not support sparse input. Consider using MaxAbsScaler instead.\n",
-    "    ('mas',MaxAbsScaler()),\n",
+    "    ('mas', MaxAbsScaler()),\n",
     "    # ('rs',RobustScaler(with_centering=False)), # 运行效率极低，耗时20分钟仍然未出结果，遂放弃\n",
     "    # ('pt',PowerTransformer()), # TypeError: A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.\n",
     "\n",
     "# Classifier\n",
     "    # ('classifier', BernoulliNB())\n",
     "    # ('classifier', MultinomialNB())\n",
-    "\t('classifier', MultinomialNB(alpha=1))\n",
-    "    # ('classifier', ComplementNB())\n",
-    "    # ('classifier', ComplementNB(alpha=1))\n",
+    "\t# ('classifier', MultinomialNB(alpha=0.99))\n",
+    "    ('classifier', ComplementNB(alpha=0.25))\n",
+    "    # ('classifier', ComplementNB(alpha=0.5))\n",
+    "    # ('classifier', ComplementNB(alpha=0.99))\n",
+    "\t# ('classifier', LogisticRegression())\n",
+    "\t\n",
     "    # ========================================================================\n",
     "    \n",
     "    # ------------------------------------------------------------------------\n",
-    "]"
+    "] "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [
     {
@@ -1107,20 +1141,20 @@
      "output_type": "stream",
      "text": [
       "在测试集上的混淆矩阵：\n",
-      "[[70125   689]\n",
-      " [  170  7677]]\n",
+      "[[137768   3828]\n",
+      " [   140  15586]]\n",
       "在测试集上的分类结果报告：\n",
       "              precision    recall  f1-score   support\n",
       "\n",
-      "           0       1.00      0.99      0.99     70814\n",
-      "           1       0.92      0.98      0.95      7847\n",
+      "           0       1.00      0.97      0.99    141596\n",
+      "           1       0.80      0.99      0.89     15726\n",
       "\n",
-      "    accuracy                           0.99     78661\n",
-      "   macro avg       0.96      0.98      0.97     78661\n",
-      "weighted avg       0.99      0.99      0.99     78661\n",
+      "    accuracy                           0.97    157322\n",
+      "   macro avg       0.90      0.98      0.94    157322\n",
+      "weighted avg       0.98      0.97      0.98    157322\n",
       "\n",
       "在测试集上的 f1-score ：\n",
-      "0.9470178252019984\n"
+      "0.8870802504268639\n"
      ]
     }
    ],
@@ -1146,7 +1180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [
     {
@@ -1155,7 +1189,7 @@
        "['results/pipeline.model']"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 103,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1164,8 +1198,7 @@
     "# 在所有的样本上训练一次，充分利用已有的数据，提高模型的泛化能力\n",
     "pipeline.fit(X, y)\n",
     "# 保存训练的模型，请将模型保存在 results 目录下\n",
-    "# from sklearn.externals import joblib\n",
-    "import joblib\n",
+    "from sklearn.externals import joblib\n",
     "pipeline_path = 'results/pipeline.model'\n",
     "joblib.dump(pipeline, pipeline_path)"
    ]
@@ -1191,7 +1224,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 104,
    "metadata": {
     "deletable": false,
     "select": true
@@ -1199,8 +1232,7 @@
    "outputs": [],
    "source": [
     "# 加载训练好的模型\n",
-    "# from sklearn.externals import joblib\n",
-    "import joblib\n",
+    "from sklearn.externals import joblib\n",
     "# ------- pipeline 保存的路径，若有变化请修改 --------\n",
     "pipeline_path = 'results/pipeline.model'\n",
     "# --------------------------------------------------\n",
@@ -1221,14 +1253,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 105,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0 [0.9944540509554506, 0.005545949044549219]\n"
+      "0 [0.999999993290345, 6.709663776094601e-09]\n"
      ]
     }
    ],
@@ -1237,13 +1269,6 @@
     "label, proba = predict('医生 拿 着 我 的 报告单 说 ： 幸亏 你 来 的 早 啊')\n",
     "print(label, proba)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -1262,7 +1287,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.19"
+   "version": "3.7.5"
   },
   "nbTranslate": {
    "displayLangs": [
diff --git "a/Exp3_SMS_Spam_Recognition/picture/1.png\357\200\272Zone.Identifier" "b/Exp3_SMS_Spam_Recognition/picture/1.png\357\200\272Zone.Identifier"
deleted file mode 100644
index 4d578b4..0000000
--- "a/Exp3_SMS_Spam_Recognition/picture/1.png\357\200\272Zone.Identifier"
+++ /dev/null
@@ -1,4 +0,0 @@
-[ZoneTransfer]
-ZoneId=3
-ReferrerUrl=https://mo.zju.edu.cn/workspace/661c8f99f57cca0fee8a4177/app
-HostUrl=https://mo.zju.edu.cn/hub_api/user/O8JZoIbiqaRE8wlZ3eAWttxvFiDLYPn4b0QlBVW59uV7SMHhYoi4qrRPr5vFhTx9Pg%3D%3D/files/picture/1.png?token=7c3446d25118435daaa923014199f57c
diff --git "a/Exp3_SMS_Spam_Recognition/picture/2.png\357\200\272Zone.Identifier" "b/Exp3_SMS_Spam_Recognition/picture/2.png\357\200\272Zone.Identifier"
deleted file mode 100644
index 9b52090..0000000
--- "a/Exp3_SMS_Spam_Recognition/picture/2.png\357\200\272Zone.Identifier"
+++ /dev/null
@@ -1,4 +0,0 @@
-[ZoneTransfer]
-ZoneId=3
-ReferrerUrl=https://mo.zju.edu.cn/workspace/661c8f99f57cca0fee8a4177/app
-HostUrl=https://mo.zju.edu.cn/hub_api/user/O8JZoIbiqaRE8wlZ3eAWttxvFiDLYPn4b0QlBVW59uV7SMHhYoi4qrRPr5vFhTx9Pg%3D%3D/files/picture/2.png?token=7c3446d25118435daaa923014199f57c
diff --git "a/Exp3_SMS_Spam_Recognition/picture/3.png\357\200\272Zone.Identifier" "b/Exp3_SMS_Spam_Recognition/picture/3.png\357\200\272Zone.Identifier"
deleted file mode 100644
index 4ce4c7d..0000000
--- "a/Exp3_SMS_Spam_Recognition/picture/3.png\357\200\272Zone.Identifier"
+++ /dev/null
@@ -1,4 +0,0 @@
-[ZoneTransfer]
-ZoneId=3
-ReferrerUrl=https://mo.zju.edu.cn/workspace/661c8f99f57cca0fee8a4177/app
-HostUrl=https://mo.zju.edu.cn/hub_api/user/O8JZoIbiqaRE8wlZ3eAWttxvFiDLYPn4b0QlBVW59uV7SMHhYoi4qrRPr5vFhTx9Pg%3D%3D/files/picture/3.png?token=7c3446d25118435daaa923014199f57c
diff --git "a/Exp3_SMS_Spam_Recognition/picture/4.png\357\200\272Zone.Identifier" "b/Exp3_SMS_Spam_Recognition/picture/4.png\357\200\272Zone.Identifier"
deleted file mode 100644
index 0afba1f..0000000
--- "a/Exp3_SMS_Spam_Recognition/picture/4.png\357\200\272Zone.Identifier"
+++ /dev/null
@@ -1,4 +0,0 @@
-[ZoneTransfer]
-ZoneId=3
-ReferrerUrl=https://mo.zju.edu.cn/workspace/661c8f99f57cca0fee8a4177/app
-HostUrl=https://mo.zju.edu.cn/hub_api/user/O8JZoIbiqaRE8wlZ3eAWttxvFiDLYPn4b0QlBVW59uV7SMHhYoi4qrRPr5vFhTx9Pg%3D%3D/files/picture/4.png?token=7c3446d25118435daaa923014199f57c
diff --git a/Exp3_SMS_Spam_Recognition/requirements.txt b/Exp3_SMS_Spam_Recognition/requirements.txt
new file mode 100644
index 0000000..988d381
--- /dev/null
+++ b/Exp3_SMS_Spam_Recognition/requirements.txt
@@ -0,0 +1,32 @@
+backcall==0.2.0
+certifi @ file:///croot/certifi_1671487769961/work/certifi
+debugpy==1.7.0
+decorator==5.1.1
+entrypoints==0.4
+ipykernel==6.16.2
+ipython==7.34.0
+jedi==0.19.1
+joblib==1.1.0
+jupyter_client==7.4.9
+jupyter_core==4.12.0
+matplotlib-inline==0.1.6
+nest-asyncio==1.6.0
+numpy==1.18.5
+packaging==24.0
+pandas==1.3.5
+parso==0.8.4
+pexpect==4.9.0
+pickleshare==0.7.5
+prompt-toolkit==3.0.43
+psutil==5.9.8
+ptyprocess==0.7.0
+Pygments==2.17.2
+python-dateutil==2.9.0.post0
+pytz==2022.1
+pyzmq==26.0.0
+scikit-learn==0.22.2.post1
+scipy==1.3.3
+six==1.16.0
+tornado==6.2
+traitlets==5.9.0
+wcwidth==0.2.13
diff --git a/Exp3_SMS_Spam_Recognition/results/README.md b/Exp3_SMS_Spam_Recognition/results/README.md
new file mode 100644
index 0000000..efbca6c
--- /dev/null
+++ b/Exp3_SMS_Spam_Recognition/results/README.md
@@ -0,0 +1 @@
+The trained model should be placed in this folder.
\ No newline at end of file