Skip to content

Commit f9f205b

Browse files
committed
update path resolution for example .txt files
1 parent 59e15c0 commit f9f205b

File tree

3 files changed

+135
-111
lines changed

3 files changed

+135
-111
lines changed

example/example-chunkit.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@
1010

1111
import { chunkit } from '../chunkit.js'; // this is typically just "import { chunkit } from 'semantic-chunking';", but this is a local test
1212
import fs from 'fs';
13+
import { fileURLToPath } from 'url';
14+
import { dirname, resolve } from 'path';
15+
16+
// Get current file's directory
17+
const __filename = fileURLToPath(import.meta.url);
18+
const __dirname = dirname(__filename);
1319

1420
// initialize documents array
1521
let documents = [];
16-
let textFiles = ['./example.txt', './different.txt', './similar.txt'];
22+
let textFiles = ['example.txt', 'different.txt', 'similar.txt'].map(file =>
23+
resolve(__dirname, file)
24+
);
1725

1826
// read each text file and add it to the documents array
1927
for (const textFile of textFiles) {

example/example-cramit.js

Lines changed: 64 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,65 @@
1-
// -----------------------
2-
// -- example-cramit.js --
3-
// --------------------------------------------------------------------------------
4-
// this is an example of how to use the cramit function
5-
// first we import the cramit function
6-
// then we setup the documents array with a text
7-
// then we call the cramit function with the text and an options object
8-
// the options object is optional
9-
//
10-
// the cramit function is faster than the chunkit function, but it is less accurate
11-
// useful for quickly chunking text, but not for exact semantic chunking
12-
// --------------------------------------------------------------------------------
13-
14-
import { cramit } from '../chunkit.js'; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15-
import fs from 'fs';
16-
17-
// initialize documents array
18-
let documents = [];
19-
let textFiles = ['./example3.txt'];
20-
21-
// read each text file and add it to the documents array
22-
for (const textFile of textFiles) {
23-
documents.push({
24-
document_name: textFile,
25-
document_text: await fs.promises.readFile(textFile, 'utf8')
26-
});
27-
}
28-
29-
// start timing
30-
const startTime = performance.now();
31-
32-
let myTestChunks = await cramit(
33-
documents,
34-
{
35-
logging: false,
36-
maxTokenSize: 300,
37-
onnxEmbeddingModel: "nomic-ai/nomic-embed-text-v1.5",
38-
onnxEmbeddingModelQuantized: true,
39-
localModelPath: "../models",
40-
modelCacheDir: "../models",
41-
returnEmbedding: false,
42-
returnTokenLength: true,
43-
}
44-
);
45-
46-
// end timeing
47-
const endTime = performance.now();
48-
49-
// calculate tracked time in seconds
50-
let trackedTimeSeconds = (endTime - startTime) / 1000;
51-
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
52-
53-
console.log("\n\n\n");
54-
console.log("myTestChunks:");
55-
console.log(myTestChunks);
56-
console.log("length: " + myTestChunks.length);
1+
// -----------------------
2+
// -- example-cramit.js --
3+
// --------------------------------------------------------------------------------
4+
// this is an example of how to use the cramit function
5+
// first we import the cramit function
6+
// then we setup the documents array with a text
7+
// then we call the cramit function with the text and an options object
8+
// the options object is optional
9+
//
10+
// the cramit function is faster than the chunkit function, but it is less accurate
11+
// useful for quickly chunking text, but not for exact semantic chunking
12+
// --------------------------------------------------------------------------------
13+
14+
import { cramit } from '../chunkit.js'; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15+
import fs from 'fs';
16+
import { fileURLToPath } from 'url';
17+
import { dirname, resolve } from 'path';
18+
19+
// Get current file's directory
20+
const __filename = fileURLToPath(import.meta.url);
21+
const __dirname = dirname(__filename);
22+
23+
// initialize documents array
24+
let documents = [];
25+
let textFiles = ['example3.txt'].map(file =>
26+
resolve(__dirname, file)
27+
);
28+
29+
// read each text file and add it to the documents array
30+
for (const textFile of textFiles) {
31+
documents.push({
32+
document_name: textFile,
33+
document_text: await fs.promises.readFile(textFile, 'utf8')
34+
});
35+
}
36+
37+
// start timing
38+
const startTime = performance.now();
39+
40+
let myTestChunks = await cramit(
41+
documents,
42+
{
43+
logging: false,
44+
maxTokenSize: 300,
45+
onnxEmbeddingModel: "nomic-ai/nomic-embed-text-v1.5",
46+
onnxEmbeddingModelQuantized: true,
47+
localModelPath: "../models",
48+
modelCacheDir: "../models",
49+
returnEmbedding: false,
50+
returnTokenLength: true,
51+
}
52+
);
53+
54+
// end timeing
55+
const endTime = performance.now();
56+
57+
// calculate tracked time in seconds
58+
let trackedTimeSeconds = (endTime - startTime) / 1000;
59+
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
60+
61+
console.log("\n\n\n");
62+
console.log("myTestChunks:");
63+
console.log(myTestChunks);
64+
console.log("length: " + myTestChunks.length);
5765
console.log("trackedTimeSeconds: " + trackedTimeSeconds);

example/example-sentenceit.js

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,63 @@
1-
// -----------------------
2-
// -- example-sentenceit.js --
3-
// --------------------------------------------------------------------------------
4-
// this is an example of how to use the sentenceit function
5-
// first we import the sentenceit function
6-
// then we setup the documents array with a text
7-
// then we call the sentenceit function with the text and an options object
8-
// the options object is optional
9-
//
10-
// the cramit function is faster than the chunkit function, but it is less accurate
11-
// useful for quickly chunking text, but not for exact semantic chunking
12-
// --------------------------------------------------------------------------------
13-
14-
import { sentenceit } from '../chunkit.js'; // this is typically just "import { sentenceit } from 'semantic-chunking';", but this is a local test
15-
import fs from 'fs';
16-
17-
// initialize documents array
18-
let documents = [];
19-
let textFiles = ['./example3.txt'];
20-
21-
// read each text file and add it to the documents array
22-
for (const textFile of textFiles) {
23-
documents.push({
24-
document_name: textFile,
25-
document_text: await fs.promises.readFile(textFile, 'utf8')
26-
});
27-
}
28-
29-
// start timing
30-
const startTime = performance.now();
31-
32-
let myTestSentences = await sentenceit(
33-
documents,
34-
{
35-
logging: false,
36-
onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2",
37-
dtype: 'fp32',
38-
localModelPath: "../models",
39-
modelCacheDir: "../models",
40-
returnEmbedding: true,
41-
}
42-
);
43-
44-
// end timeing
45-
const endTime = performance.now();
46-
47-
// calculate tracked time in seconds
48-
let trackedTimeSeconds = (endTime - startTime) / 1000;
49-
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
50-
51-
console.log("\n\n\n");
52-
console.log("myTestSentences:");
53-
console.log(myTestSentences);
54-
console.log("length: " + myTestSentences.length);
1+
// -----------------------
2+
// -- example-sentenceit.js --
3+
// --------------------------------------------------------------------------------
4+
// this is an example of how to use the sentenceit function
5+
// first we import the sentenceit function
6+
// then we setup the documents array with a text
7+
// then we call the sentenceit function with the text and an options object
8+
// the options object is optional
9+
//
10+
// the cramit function is faster than the chunkit function, but it is less accurate
11+
// useful for quickly chunking text, but not for exact semantic chunking
12+
// --------------------------------------------------------------------------------
13+
14+
import { sentenceit } from '../chunkit.js'; // this is typically just "import { sentenceit } from 'semantic-chunking';", but this is a local test
15+
import fs from 'fs';
16+
import { fileURLToPath } from 'url';
17+
import { dirname, resolve } from 'path';
18+
19+
// Get current file's directory
20+
const __filename = fileURLToPath(import.meta.url);
21+
const __dirname = dirname(__filename);
22+
23+
// initialize documents array
24+
let documents = [];
25+
let textFiles = ['example3.txt'].map(file =>
26+
resolve(__dirname, file)
27+
);
28+
29+
// read each text file and add it to the documents array
30+
for (const textFile of textFiles) {
31+
documents.push({
32+
document_name: textFile,
33+
document_text: await fs.promises.readFile(textFile, 'utf8')
34+
});
35+
}
36+
37+
// start timing
38+
const startTime = performance.now();
39+
40+
let myTestSentences = await sentenceit(
41+
documents,
42+
{
43+
logging: false,
44+
onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2",
45+
dtype: 'fp32',
46+
localModelPath: "../models",
47+
modelCacheDir: "../models",
48+
returnEmbedding: true,
49+
}
50+
);
51+
52+
// end timeing
53+
const endTime = performance.now();
54+
55+
// calculate tracked time in seconds
56+
let trackedTimeSeconds = (endTime - startTime) / 1000;
57+
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
58+
59+
console.log("\n\n\n");
60+
console.log("myTestSentences:");
61+
console.log(myTestSentences);
62+
console.log("length: " + myTestSentences.length);
5563
console.log("trackedTimeSeconds: " + trackedTimeSeconds);

0 commit comments

Comments
 (0)