-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembed.js
More file actions
73 lines (60 loc) · 1.59 KB
/
embed.js
File metadata and controls
73 lines (60 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import { readFileSync, readdirSync } from "fs";
import ollama from "ollama";
import Database from "better-sqlite3";
import { v4 as uuidv4 } from "uuid";
// instantiante database
const db = new Database("embedblob.db");
const sessionKey = "testUser";
db.exec(`
CREATE TABLE IF NOT EXISTS embeddings (
id TEXT PRIMARY KEY,
sessid TEXT,
name TEXT,
content TEXT,
embeddings BLOB
);
PRAGMA journal_mode = WAL; -- Better write performance
`);
// define functions
/**
*
* @param {embeddings} embeddings
* @param {Object} meta
* @param {string} content
*/
async function saveToDb(embeddings, meta, content) {
// console.log(meta, embeddings)
const transaction = db.transaction(() => {
const stmt = db.prepare(`
INSERT INTO embeddings
VALUES (?, ?, ?, ?, ?)
`);
const id = uuidv4()
stmt.run(id,meta.session, meta.name, content, embeddings);
});
transaction();
}
/**
*
* @param {string} content
* @param {Object} meta
*/
async function Embed(content, meta) {
// console.log(meta, content.substring(0, 20))
const res = await ollama.embed({
model: "mxbai-embed-large",
truncate: true,
input: content,
});
// console.log(res.model, res.embeddings.flat(), meta)
meta.model = res.model;
const f = new Float32Array(res.embeddings.flat())
saveToDb(f, meta, content);
}
// read files and embed them
let data = {};
const files = readdirSync(`./data`);
for (const f of files) {
const content = readFileSync(`./data/${f}`, "utf-8");
Embed(content, { session: sessionKey, name: f });
}