Skip to content

Commit

Permalink
group list entries and reorganize options
Browse files Browse the repository at this point in the history
  • Loading branch information
ubaldus committed Jan 9, 2025
1 parent ffdff15 commit 2c01286
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 131 deletions.
6 changes: 3 additions & 3 deletions ai.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2024 by Ubaldo Porcheddu <ubaldo@eja.it>
// Copyright (C) 2024-2025 by Ubaldo Porcheddu <ubaldo@eja.it>

package main

Expand All @@ -17,7 +17,7 @@ import (
func aiInstruct(input string) (output string, err error) {
client := openai.NewClient(
option.WithAPIKey(options.aiApiKey),
option.WithBaseURL(options.aiUrl),
option.WithBaseURL(options.aiApiUrl),
)
chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{
Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
Expand All @@ -34,7 +34,7 @@ func aiInstruct(input string) (output string, err error) {
func aiEmbeddings(input string) (output []float32, err error) {
client := openai.NewClient(
option.WithAPIKey(options.aiApiKey),
option.WithBaseURL(options.aiUrl),
option.WithBaseURL(options.aiApiUrl),
)
response, err := client.Embeddings.New(context.TODO(), openai.EmbeddingNewParams{
Model: openai.F(options.aiEmbeddingModel),
Expand Down
2 changes: 1 addition & 1 deletion assets/static/tts.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
<body>
<div class="container mt-4">
<h1 id="articleTitle" class="text-center mb-4"></h1>
<div id="articleContent" class="mb-4"></div>
<div id="articleContent" class="mb-4" style="white-space: pre-line;"></div>
</div>
<div class="control-buttons">
<div class="button-row">
Expand Down
2 changes: 1 addition & 1 deletion assets/templates/article.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ <h1 class="mb-5 text-center">{{$firstResult.Title}}</h1>
{{range $firstResult.Sections}}
<h2 class="mt-4 mb-3">{{.Title}}</h2>
{{range .Texts}}
<p class="mb-3">{{.}}</p>
<p class="mb-3" style="white-space: pre-line;">{{.}}</p>
{{end}}
{{end}}

Expand Down
62 changes: 18 additions & 44 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,24 @@ func (h *DBHandler) initializeDB() error {
text TEXT NOT NULL,
pow INTEGER DEFAULT 0
)`,

`CREATE VIRTUAL TABLE IF NOT EXISTS article_search USING fts5(
title,
content='articles',
content_rowid='id'
)`,

`CREATE VIRTUAL TABLE IF NOT EXISTS hash_search USING fts5(
text,
content='hashes',
content_rowid='id'
)`,

`CREATE TABLE IF NOT EXISTS vectors (
id INTEGER PRIMARY KEY,
embedding BLOB
)`,

`CREATE TABLE IF NOT EXISTS vectors_ann (
id INTEGER PRIMARY KEY,
embedding BLOB
Expand Down Expand Up @@ -583,47 +597,17 @@ func (h *DBHandler) PragmaImportMode() error {
return h.Pragma(pragmas)
}

func (h *DBHandler) RebuildArticleSearch() error {
_, err := h.db.Exec("DROP TABLE IF EXISTS article_search")
if err != nil {
return fmt.Errorf("error dropping article_search table: %v", err)
}

_, err = h.db.Exec(`
CREATE VIRTUAL TABLE article_search USING fts5(
title,
content='articles',
content_rowid='id'
)`)
if err != nil {
return fmt.Errorf("error recreating article_search table: %v", err)
}

_, err = h.db.Exec("INSERT INTO article_search(rowid, title) SELECT id, title FROM articles")
func (h *DBHandler) ProcessTitles() error {
_, err := h.db.Exec("INSERT INTO article_search(rowid, title) SELECT id, title FROM articles")
if err != nil {
return fmt.Errorf("error populating article_search table: %v", err)
}

return nil
}

func (h *DBHandler) RebuildHashSearch() error {
_, err := h.db.Exec("DROP TABLE IF EXISTS hash_search")
if err != nil {
return fmt.Errorf("error dropping hash_search table: %v", err)
}

_, err = h.db.Exec(`
CREATE VIRTUAL TABLE hash_search USING fts5(
text,
content='hashes',
content_rowid='id'
)`)
if err != nil {
return fmt.Errorf("error recreating hash_search table: %v", err)
}

_, err = h.db.Exec("INSERT INTO hash_search(rowid, text) SELECT id, text FROM hashes")
func (h *DBHandler) ProcessContents() error {
_, err := h.db.Exec("INSERT INTO hash_search(rowid, text) SELECT id, text FROM hashes")
if err != nil {
return fmt.Errorf("error populating hash_search table: %v", err)
}
Expand Down Expand Up @@ -751,16 +735,6 @@ func (h *DBHandler) Optimize() error {
return fmt.Errorf("error committing transaction: %v", err)
}

log.Println("Populating hash_search table")
if err := h.RebuildHashSearch(); err != nil {
return err
}

log.Println("Populating article_search table")
if err := h.RebuildArticleSearch(); err != nil {
return err
}

log.Println("Running VACUUM")
_, err = h.db.Exec("VACUUM")
if err != nil {
Expand Down
81 changes: 50 additions & 31 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,29 @@ import (
"os"
)

const Version = "0.1.3"
const Version = "0.1.4"

type Config struct {
importPath string //https://dumps.wikimedia.org/other/enterprise_html/runs/...
dbPath string
web bool
webHost string
webPort int
webTlsPrivate string
webTlsPublic string
ai bool
aiApiKey string
aiApiUrl string
aiEmbeddingModel string
aiEmbeddingSize int
aiEmbeddingSync bool
aiLlmModel string
aiUrl string
log bool
logFile string
cli bool
limit int
dbOptimize bool
dbPath string
dbSyncEmbeddings bool
dbSyncFTS bool
language string
optimize bool
limit int
log bool
logFile string
web bool
webHost string
webPort int
webTlsPrivate string
webTlsPublic string
wikiImport string //https://dumps.wikimedia.org/other/enterprise_html/runs/...
}

var (
Expand All @@ -43,25 +43,30 @@ var (
func parseConfig() (*Config, error) {
options = &Config{}
flag.BoolVar(&options.ai, "ai", false, "Enable AI")
flag.IntVar(&options.aiEmbeddingSize, "ai-embedding-size", 384, "AI embedding size")
flag.StringVar(&options.aiApiKey, "ai-api-key", "", "AI API key")
flag.StringVar(&options.aiApiUrl, "ai-api-url", "http://localhost:11434/v1/", "AI API base url")
flag.StringVar(&options.aiEmbeddingModel, "ai-embedding-model", "all-minilm", "AI embedding model")
flag.BoolVar(&options.aiEmbeddingSync, "ai-embedding-sync", false, "AI embedding sync")
flag.StringVar(&options.aiLlmModel, "ai-llm-model", "gemma2", "AI LLM model")
flag.StringVar(&options.aiUrl, "ai-url", "http://localhost:11434/v1/", "AI base url")
flag.StringVar(&options.aiApiKey, "ai-api-key", "", "AI API key")

flag.BoolVar(&options.cli, "cli", false, "Interactive search")

flag.StringVar(&options.dbPath, "db", "wikilite.db", "SQLite database path")
flag.StringVar(&options.importPath, "import", "", "URL or file path to import")
flag.BoolVar(&options.dbOptimize, "db-optimize", false, "Optimize database")
flag.BoolVar(&options.dbSyncEmbeddings, "db-sync-embeddings", false, "Sync database embeddings")
flag.BoolVar(&options.dbSyncFTS, "db-sync-fts", false, "Sync database full text search")

flag.StringVar(&options.language, "language", "en", "Language")
flag.IntVar(&options.limit, "limit", 5, "Maximum number of search results")
flag.BoolVar(&options.log, "log", false, "Enable logging")
flag.StringVar(&options.logFile, "log-file", "", "Log file path")

flag.BoolVar(&options.web, "web", false, "Enable web interface")
flag.StringVar(&options.webHost, "web-host", "localhost", "Web server host")
flag.IntVar(&options.webPort, "web-port", 35248, "Web server port")
flag.StringVar(&options.webTlsPrivate, "web-tls-private", "", "TLS private certificate")
flag.StringVar(&options.webTlsPublic, "web-tls-public", "", "TLS public certificate")
flag.BoolVar(&options.log, "log", false, "Enable logging")
flag.StringVar(&options.logFile, "log-file", "", "Log file path")
flag.BoolVar(&options.cli, "cli", false, "Interactive search")
flag.IntVar(&options.limit, "limit", 5, "Maximum number of search results")
flag.StringVar(&options.language, "language", "en", "Language")
flag.BoolVar(&options.optimize, "optimize", false, "Optimize database")

flag.StringVar(&options.wikiImport, "wiki-import", "", "URL or file path for wikipedia import")

flag.Usage = func() {
fmt.Println("Copyright:", "2024-2025 by Ubaldo Porcheddu <ubaldo@eja.it>")
Expand Down Expand Up @@ -106,25 +111,39 @@ func main() {
}
defer db.Close()

if options.optimize || options.importPath != "" || options.aiEmbeddingSync {
if options.dbOptimize || options.dbSyncFTS || options.dbSyncEmbeddings || options.wikiImport != "" {
if err := db.PragmaImportMode(); err != nil {
log.Fatalf("Error setting database in import mode: %v\n", err)
}
if options.importPath != "" {
if err = WikiImport(options.importPath); err != nil {
if options.wikiImport != "" {
if err = WikiImport(options.wikiImport); err != nil {
log.Fatalf("Error processing import: %v\n", err)
}
options.dbOptimize = true
options.dbSyncFTS = true
}
if options.optimize || options.importPath != "" {

if options.dbOptimize {
if err := db.Optimize(); err != nil {
log.Fatalf("Error during database optimization: %v\n", err)
}
}
if options.aiEmbeddingSync {

if options.dbSyncFTS {
if err := db.ProcessTitles(); err != nil {
log.Fatalf("Error processing FTS titles: %v\n", err)
}
if err := db.ProcessContents(); err != nil {
log.Fatalf("Error processing FTS contents: %v\n", err)
}
}

if options.dbSyncEmbeddings {
if err := db.ProcessEmbeddings(); err != nil {
log.Fatalf("Error processing embeddings: %v\n", err)
}
}

if err := db.PragmaReadMode(); err != nil {
log.Fatalf("Error setting database in read mode: %v\n", err)
}
Expand Down
Loading

0 comments on commit 2c01286

Please sign in to comment.