1
- // -----------------------
2
- // -- example-cramit.js --
3
- // --------------------------------------------------------------------------------
4
- // this is an example of how to use the cramit function
5
- // first we import the cramit function
6
- // then we setup the documents array with a text
7
- // then we call the cramit function with the text and an options object
8
- // the options object is optional
9
- //
10
- // the cramit function is faster than the chunkit function, but it is less accurate
11
- // useful for quickly chunking text, but not for exact semantic chunking
12
- // --------------------------------------------------------------------------------
13
-
14
- import { cramit } from '../chunkit.js' ; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15
- import fs from 'fs' ;
16
-
17
- // initialize documents array
18
- let documents = [ ] ;
19
- let textFiles = [ './example3.txt' ] ;
20
-
21
- // read each text file and add it to the documents array
22
- for ( const textFile of textFiles ) {
23
- documents . push ( {
24
- document_name : textFile ,
25
- document_text : await fs . promises . readFile ( textFile , 'utf8' )
26
- } ) ;
27
- }
28
-
29
- // start timing
30
- const startTime = performance . now ( ) ;
31
-
32
- let myTestChunks = await cramit (
33
- documents ,
34
- {
35
- logging : false ,
36
- maxTokenSize : 300 ,
37
- onnxEmbeddingModel : "nomic-ai/nomic-embed-text-v1.5" ,
38
- onnxEmbeddingModelQuantized : true ,
39
- localModelPath : "../models" ,
40
- modelCacheDir : "../models" ,
41
- returnEmbedding : false ,
42
- returnTokenLength : true ,
43
- }
44
- ) ;
45
-
46
- // end timeing
47
- const endTime = performance . now ( ) ;
48
-
49
- // calculate tracked time in seconds
50
- let trackedTimeSeconds = ( endTime - startTime ) / 1000 ;
51
- trackedTimeSeconds = parseFloat ( trackedTimeSeconds . toFixed ( 2 ) ) ;
52
-
53
- console . log ( "\n\n\n" ) ;
54
- console . log ( "myTestChunks:" ) ;
55
- console . log ( myTestChunks ) ;
56
- console . log ( "length: " + myTestChunks . length ) ;
1
+ // -----------------------
2
+ // -- example-cramit.js --
3
+ // --------------------------------------------------------------------------------
4
+ // this is an example of how to use the cramit function
5
+ // first we import the cramit function
6
+ // then we setup the documents array with a text
7
+ // then we call the cramit function with the text and an options object
8
+ // the options object is optional
9
+ //
10
+ // the cramit function is faster than the chunkit function, but it is less accurate
11
+ // useful for quickly chunking text, but not for exact semantic chunking
12
+ // --------------------------------------------------------------------------------
13
+
14
+ import { cramit } from '../chunkit.js' ; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15
+ import fs from 'fs' ;
16
+ import { fileURLToPath } from 'url' ;
17
+ import { dirname , resolve } from 'path' ;
18
+
19
+ // Get current file's directory
20
+ const __filename = fileURLToPath ( import . meta. url ) ;
21
+ const __dirname = dirname ( __filename ) ;
22
+
23
+ // initialize documents array
24
+ let documents = [ ] ;
25
+ let textFiles = [ 'example3.txt' ] . map ( file =>
26
+ resolve ( __dirname , file )
27
+ ) ;
28
+
29
+ // read each text file and add it to the documents array
30
+ for ( const textFile of textFiles ) {
31
+ documents . push ( {
32
+ document_name : textFile ,
33
+ document_text : await fs . promises . readFile ( textFile , 'utf8' )
34
+ } ) ;
35
+ }
36
+
37
+ // start timing
38
+ const startTime = performance . now ( ) ;
39
+
40
+ let myTestChunks = await cramit (
41
+ documents ,
42
+ {
43
+ logging : false ,
44
+ maxTokenSize : 300 ,
45
+ onnxEmbeddingModel : "nomic-ai/nomic-embed-text-v1.5" ,
46
+ onnxEmbeddingModelQuantized : true ,
47
+ localModelPath : "../models" ,
48
+ modelCacheDir : "../models" ,
49
+ returnEmbedding : false ,
50
+ returnTokenLength : true ,
51
+ }
52
+ ) ;
53
+
54
+ // end timeing
55
+ const endTime = performance . now ( ) ;
56
+
57
+ // calculate tracked time in seconds
58
+ let trackedTimeSeconds = ( endTime - startTime ) / 1000 ;
59
+ trackedTimeSeconds = parseFloat ( trackedTimeSeconds . toFixed ( 2 ) ) ;
60
+
61
+ console . log ( "\n\n\n" ) ;
62
+ console . log ( "myTestChunks:" ) ;
63
+ console . log ( myTestChunks ) ;
64
+ console . log ( "length: " + myTestChunks . length ) ;
57
65
console . log ( "trackedTimeSeconds: " + trackedTimeSeconds ) ;
0 commit comments