From 555c4d59bd0284a1e33170cce8f06b2343e6aef1 Mon Sep 17 00:00:00 2001
From: DoodleBears <yangmufeng233@gmail.com>
Date: Mon, 1 Jul 2024 02:57:57 +0900
Subject: [PATCH] fix: add jupyter file demo split-lang-demo.ipynb

---
 split-lang-demo.ipynb | 86 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 split-lang-demo.ipynb

diff --git a/split-lang-demo.ipynb b/split-lang-demo.ipynb
new file mode 100644
index 0000000..678c0b1
--- /dev/null
+++ b/split-lang-demo.ipynb
@@ -0,0 +1,86 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\admin\\.conda\\envs\\melotts\\lib\\site-packages\\wtpsplit\\__init__.py:48: DeprecationWarning: You are using WtP, the old sentence segmentation model. It is highly encouraged to use SaT instead due to strongly improved performance and efficiency. See https://github.com/segment-any-text/wtpsplit for more info. To ignore this warning, set ignore_legacy_warning=True.\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\admin\\.conda\\envs\\melotts\\lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+      "  warnings.warn(\n",
+      "2024-07-01 02:55:01,151  DEBUG [urllib3.connectionpool]: Starting new HTTPS connection (1): huggingface.co:443\n",
+      "2024-07-01 02:55:01,760  DEBUG [urllib3.connectionpool]: https://huggingface.co:443 \"HEAD /benjamin/wtp-bert-mini/resolve/main/mixtures.skops HTTP/11\" 302 0\n",
+      "2024-07-01 02:55:02,348  DEBUG [urllib3.connectionpool]: https://huggingface.co:443 \"HEAD /benjamin/wtp-bert-mini/resolve/main/config.json HTTP/11\" 200 0\n",
+      "c:\\Users\\admin\\.conda\\envs\\melotts\\lib\\site-packages\\sklearn\\base.py:376: InconsistentVersionWarning: Trying to unpickle estimator LogisticRegression from version 1.2.2 when using version 1.5.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
+      "https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "import langsplit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0|zh:你喜欢看\n",
+      "1|ja:アニメ\n",
+      "2|zh:吗\n",
+      "3|punctuation:？\n",
+      "----------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langsplit import split_by_lang\n",
+    "\n",
+    "texts = [\n",
+    "    \"你喜欢看アニメ吗？\",\n",
+    "]\n",
+    "\n",
+    "for text in texts:\n",
+    "    substr = split_by_lang(\n",
+    "        text=text,\n",
+    "        threshold=4.9e-5,\n",
+    "        default_lang=\"en\",\n",
+    "    )\n",
+    "    for index, item in enumerate(substr):\n",
+    "        print(f\"{index}|{item.lang}:{item.text}\")\n",
+    "    print(\"----------------------\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "melotts",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}