Skip to content

Commit

Permalink
allow downloading database and gguf from huggingface
Browse files Browse the repository at this point in the history
  • Loading branch information
ubaldus committed Jan 12, 2025
1 parent 5812738 commit 693c6bf
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ wikilite
wikilite.exe
wikilite.db*
dist/
*.gguf
8 changes: 7 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"os"
)

const Version = "0.3.0"
const Version = "0.4.0"

type Config struct {
aiApiKey string
Expand All @@ -23,6 +23,7 @@ type Config struct {
limit int
log bool
logFile string
setup bool
web bool
webHost string
webPort int
Expand Down Expand Up @@ -52,6 +53,7 @@ func parseConfig() (*Config, error) {
flag.IntVar(&options.limit, "limit", 5, "Maximum number of search results")
flag.BoolVar(&options.log, "log", false, "Enable logging")
flag.StringVar(&options.logFile, "log-file", "", "Log file path")
flag.BoolVar(&options.setup, "setup", false, "Download a ready made database and embeddings model")

flag.BoolVar(&options.web, "web", false, "Enable web interface")
flag.StringVar(&options.webHost, "web-host", "localhost", "Web server host")
Expand Down Expand Up @@ -86,6 +88,10 @@ func main() {
os.Exit(0)
}

if options.setup {
Setup()
}

if options.log || options.logFile != "" {
if options.logFile != "" {
logFile, err := os.OpenFile(options.logFile, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
Expand Down
161 changes: 161 additions & 0 deletions setup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// Copyright (C) 2025 by Ubaldo Porcheddu <ubaldo@eja.it>

package main

import (
"bufio"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
)

type SetupSibling struct {
Rfilename string `json:"rfilename"`
}

type SetupDatasetInfo struct {
Siblings []SetupSibling `json:"siblings"`
}

func setupDownloadFile(url string, outputPath string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()

out, err := os.Create(outputPath)
if err != nil {
return err
}
defer out.Close()

_, err = io.Copy(out, resp.Body)
return err
}

func setupGunzipFile(url string, outputPath string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()

gzReader, err := gzip.NewReader(resp.Body)
if err != nil {
return err
}
defer gzReader.Close()

out, err := os.Create(outputPath)
if err != nil {
return err
}
defer out.Close()

_, err = io.Copy(out, gzReader)
return err
}

func setupFetchDatasetInfo(url string) (*SetupDatasetInfo, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()

var datasetInfo SetupDatasetInfo
err = json.NewDecoder(resp.Body).Decode(&datasetInfo)
if err != nil {
return nil, err
}

return &datasetInfo, nil
}

func setupFilterAndDisplayFiles(siblings []SetupSibling) []SetupSibling {
var dbFiles []SetupSibling
for _, sibling := range siblings {
if strings.HasSuffix(sibling.Rfilename, ".db.gz") {
dbFiles = append(dbFiles, sibling)
fmt.Printf("%d. %s\n", len(dbFiles), sibling.Rfilename)
}
}
return dbFiles
}

func setupGetGGUFFileName(dbFile string) string {
baseName := strings.TrimSuffix(dbFile, ".db.gz")
parts := strings.Split(baseName, ".")
if len(parts) == 2 {
return parts[1] + ".gguf"
}
return ""
}

func setupFileExists(filename string) bool {
_, err := os.Stat(filename)
return !os.IsNotExist(err)
}

func Setup() {
url := "https://huggingface.co/api/datasets/eja/wikilite"

datasetInfo, err := setupFetchDatasetInfo(url)
if err != nil {
fmt.Println("Error fetching dataset info:", err)
return
}

dbFiles := setupFilterAndDisplayFiles(datasetInfo.Siblings)
if len(dbFiles) == 0 {
fmt.Println("No .db.gz files found.")
return
}

fmt.Print("Choose a file by number: ")
reader := bufio.NewReader(os.Stdin)
input, _ := reader.ReadString('\n')
var choice int
_, err = fmt.Sscanf(input, "%d", &choice)
if err != nil || choice < 1 || choice > len(dbFiles) {
fmt.Println("Invalid choice.")
return
}

selectedDB := dbFiles[choice-1].Rfilename
baseURL := "https://huggingface.co/datasets/eja/wikilite/resolve/main/"

if setupFileExists("wikilite.db") {
fmt.Println("A wikilite.db already exists in the current directory.")
return
}

fmt.Println("Downloading and extracting", selectedDB)
err = setupGunzipFile(baseURL+selectedDB, "wikilite.db")
if err != nil {
fmt.Println("Error downloading and extracting file:", err)
return
}
fmt.Println("Saved as wikilite.db")

ggufFile := setupGetGGUFFileName(selectedDB)
if ggufFile != "" {
if setupFileExists(ggufFile) {
fmt.Printf("%s already exists in the current directory.\n", ggufFile)
return
}

fmt.Println("Downloading corresponding gguf model:", ggufFile)
err = setupDownloadFile(baseURL+ggufFile, ggufFile)
if err != nil {
fmt.Println("Error downloading .gguf file:", err)
return
}
fmt.Println("Saved as", ggufFile)
}
}

0 comments on commit 693c6bf

Please sign in to comment.