Skip to content

Commit

Permalink
fix: Saving document chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
booleangabs committed Jun 8, 2024
1 parent 3e47dc9 commit 6852a43
Show file tree
Hide file tree
Showing 3 changed files with 2,020 additions and 17,053 deletions.
60 changes: 35 additions & 25 deletions notebooks/1.0-jgpt-RAG-with-MLC-HNSW.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,12 @@
"metadata": {},
"outputs": [],
"source": [
"!export HF_TOKEN=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"
"!export HF_TOKEN=YOUR_TOKEN"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "88b17a2e-d538-47e3-a399-c5e45fb913b6",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -333,17 +333,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "81f44aa3-806c-456a-95ec-86bcbe0df436",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-06-03 11:14:36,330 - autollm - INFO - Reading files from data/..\n",
"Loading files: 100%|████████████████████████████| 1/1 [00:04<00:00, 4.46s/file]\n",
"2024-06-03 11:14:40,795 - autollm - INFO - Found 1 'document(s)'.\n"
"2024-06-08 10:32:11,492 - autollm - INFO - Reading files from data/..\n",
"Loading files: 100%|████████████████████████████| 1/1 [00:04<00:00, 4.54s/file]\n",
"2024-06-08 10:32:16,065 - autollm - INFO - Found 1 'document(s)'.\n"
]
}
],
Expand All @@ -354,7 +354,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"id": "be18b636-b2e9-49ae-b538-70d8c649c0b7",
"metadata": {},
"outputs": [],
Expand All @@ -369,15 +369,15 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 4,
"id": "5f12d457-89c4-4718-be35-3630039eb28b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.33 ms ± 91.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
"4.46 ms ± 130 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
Expand All @@ -388,7 +388,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 5,
"id": "6fb52ca1-e28d-413c-a0ca-f90686f2e0ef",
"metadata": {},
"outputs": [
Expand All @@ -398,7 +398,7 @@
"'User Guide\\n\\n\\x0c© 2022 Motorola Mobility LLC. All rights reserved.'"
]
},
"execution_count": 14,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -410,7 +410,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 6,
"id": "1f68034f-fa2e-4282-bf17-2883bdf4c22e",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -468,7 +468,7 @@
")"
]
},
"execution_count": 15,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -481,7 +481,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 7,
"id": "cd8e8673-8c63-43bc-9193-efdeb2fb2bfd",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -515,7 +515,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 8,
"id": "575735ed-20c9-4a7a-a67d-f8b14fbe7eff",
"metadata": {},
"outputs": [
Expand All @@ -525,7 +525,7 @@
"torch.Size([4, 384])"
]
},
"execution_count": 17,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -536,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 9,
"id": "2fbcd6eb-b525-48c4-b1df-3c8953fc1ab2",
"metadata": {},
"outputs": [
Expand All @@ -546,7 +546,7 @@
"993"
]
},
"execution_count": 18,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -557,7 +557,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 10,
"id": "8ce21b38-4c95-4515-805b-643d9fa7fade",
"metadata": {},
"outputs": [],
Expand All @@ -573,7 +573,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 11,
"id": "d7ec879e-0935-49e5-bafb-dbc54ddfd543",
"metadata": {},
"outputs": [],
Expand All @@ -583,7 +583,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 12,
"id": "f4e13a15-6665-4f30-b4f8-7f51652ecc85",
"metadata": {},
"outputs": [
Expand All @@ -593,7 +593,7 @@
"True"
]
},
"execution_count": 23,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -1088,7 +1088,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 13,
"id": "7af96c8f-3e17-41e6-b735-6135a8081285",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -1505,12 +1505,22 @@
},
{
"cell_type": "code",
"execution_count": 130,
"execution_count": 15,
"id": "395e1b45-d16b-4ff9-ac07-c7a98074f086",
"metadata": {},
"outputs": [],
"source": [
"pd.Series(texts, name=\"chunks\").map(lambda x: x.replace(\"\\n\", \"\\\\n\").replace(\"\\t\", \"\")).to_csv(\"data/documents1.csv\", index=False, sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "ad4aeb2a-2c0a-45be-9d79-b7a22935fea2",
"metadata": {},
"outputs": [],
"source": [
"pd.Series(texts, name=\"chunks\").to_csv(\"data/documents.csv\", index=False)"
"pd.Series(texts, name=\"chunks\").to_csv(\"data/documents1.csv\", index=False, sep=\"|\")"
]
},
{
Expand Down
Loading

0 comments on commit 6852a43

Please sign in to comment.