From a9b843dc602d3ba8944b8dea673e302549f1d41c Mon Sep 17 00:00:00 2001 From: FMJdev Date: Wed, 9 Oct 2024 07:25:32 +0200 Subject: [PATCH] refactor(kv): finish `kv` package --- pkg/helpers/primitives.go | 12 +++ pkg/kv/format.go | 105 --------------------- pkg/kv/kv.go | 88 ++++++++--------- pkg/kv/op.go | 192 ++++++++++++++++++++++++++++++++++---- pkg/kv/types.go | 88 ++++++++++------- 5 files changed, 276 insertions(+), 209 deletions(-) delete mode 100644 pkg/kv/format.go diff --git a/pkg/helpers/primitives.go b/pkg/helpers/primitives.go index c3adaf9..e11fe67 100644 --- a/pkg/helpers/primitives.go +++ b/pkg/helpers/primitives.go @@ -1,5 +1,17 @@ package helpers +import "unicode" + +func OnlyLetters(s string) bool { + for _, r := range s { + if !unicode.IsLetter(r) { + return false + } + } + + return true +} + func StrPtr(s string) *string { return &s } diff --git a/pkg/kv/format.go b/pkg/kv/format.go deleted file mode 100644 index 9c90f11..0000000 --- a/pkg/kv/format.go +++ /dev/null @@ -1,105 +0,0 @@ -package kv - -import ( - "encoding/json" - "fmt" - "gopkg.in/yaml.v3" - "strings" -) - -type Format int - -const ( - RAW Format = iota + 1 - JSON - YAML -) - -// String implements the Stringer interface -func (e Format) String() string { - return [...]string{"json", "yaml"}[e-1] -} - -// Index makes the index of the Environment retrievable -func (e Format) Index() int { - return int(e) -} - -// IsRAW utility function to quickly determine the format -func (e Format) IsRAW() bool { - return e == RAW -} - -// IsJSON utility function to quickly determine the format -func (e Format) IsJSON() bool { - return e == JSON -} - -// IsYAML utility function to quickly determine the format -func (e Format) IsYAML() bool { - return e == YAML -} - -// FormatFromString matches a string value for a known Format returns the -// corresponding Format. If an invalid Format is given, it returns -// YAML along is a non-nil error -func FormatFromString(s string) (Format, error) { - switch strings.ToLower(s) { - case JSON.String(): - return JSON, nil - case YAML.String(): - return YAML, nil - case RAW.String(): - return RAW, nil - default: - return RAW, fmt.Errorf("invalid file format: %s", s) - } -} - -// Formatter is an object that knows how to format string or byte-slice input -type Formatter interface { - Format(input string) (string, error) - FormatBytes(input []byte) ([]byte, error) -} - -// JSONFormatter implements the JSON format -type JSONFormatter struct{} - -func (jf JSONFormatter) Format(input string) (string, error) { - js, err := json.Marshal(input) - if err != nil { - return "", err - } - - return string(js), nil -} - -func (jf JSONFormatter) FormatBytes(input []byte) ([]byte, error) { - js, err := json.Marshal(input) - if err != nil { - return nil, err - } - - return js, nil -} - -// YAMLFormatter implements the JSON format -type YAMLFormatter struct{} - -func (yf YAMLFormatter) Format(input string) (string, error) { - ya, err := yaml.Marshal(input) - if err != nil { - return "", err - } - - return string(ya), nil -} - -func (yf YAMLFormatter) FormatBytes(input []byte) ([]byte, error) { - ya, err := yaml.Marshal(input) - if err != nil { - return nil, err - } - - return ya, nil -} diff --git a/pkg/kv/kv.go b/pkg/kv/kv.go index 584d74e..9c60b12 100644 --- a/pkg/kv/kv.go +++ b/pkg/kv/kv.go @@ -7,6 +7,8 @@ import ( "context" "fmt" "github.com/dgraph-io/badger/v4" + "github.com/fmjstudios/gopskit/pkg/fs" + "os" "sync" ) @@ -17,14 +19,15 @@ func New(path string, opts ...Opt) (*Database, error) { bOpts := badger.DefaultOptions(path) db := &Database{ - kv: nil, - conf: bOpts, - path: path, - ns: DefaultNamespaces, - opt: DefaultOperation(), - ctx: ctx, - cancel: cancel, - lock: sync.Mutex{}, + kv: nil, + options: bOpts, + path: path, + namespaces: DefaultNamespaces, + currentNamespace: DefaultNamespaces[0], + discardRatio: DefaultDiscardRatio, + ctx: ctx, + cancel: cancel, + lock: sync.Mutex{}, } // (re)-configure @@ -38,8 +41,18 @@ func New(path string, opts ...Opt) (*Database, error) { } wg.Wait() + // create path if it does not exist + exists := fs.CheckIfExists(path) + if !exists { + err := os.MkdirAll(path, 0755) + if err != nil { + defer cancel() + return nil, err + } + } + // db - bdb, err := badger.Open(db.conf) + bdb, err := badger.Open(db.options) if err != nil { return nil, fmt.Errorf("could not open badger database: %w", err) } @@ -48,35 +61,14 @@ func New(path string, opts ...Opt) (*Database, error) { return db, nil } -// DefaultOperation set's the initial default Operation values -func DefaultOperation() *Operation { - return &Operation{ - namespace: DefaultNamespaces[0], - format: YAML, - lock: sync.Mutex{}, - } -} - // SetNamespace ... func (d *Database) SetNamespace(ns string) { d.lock.Lock() defer d.lock.Unlock() - d.opt.namespace = ns -} - -// SetFormat ... -func (d *Database) SetFormat(format Format) { - d.lock.Lock() - defer d.lock.Unlock() - - d.opt.format = format + d.currentNamespace = ns } -// -// Instantiation Options -// - // WithPath ... func WithPath(path string) Opt { return func(d *Database) { @@ -87,13 +79,13 @@ func WithPath(path string) Opt { } } -// WithExtraNamespaces ... -func WithExtraNamespaces(namespaces ...string) Opt { +// WithNamespaces ... +func WithNamespaces(namespaces ...string) Opt { return func(d *Database) { d.lock.Lock() defer d.lock.Unlock() - d.ns = append(d.ns, namespaces...) + d.namespaces = append(d.namespaces, namespaces...) } } @@ -103,7 +95,7 @@ func WithBadgerOptions(opts badger.Options) Opt { d.lock.Lock() defer d.lock.Unlock() - d.conf = opts + d.options = opts } } @@ -119,26 +111,22 @@ func WithContext(ctx context.Context) Opt { } } -// -// Operation Options -// - // WithNamespace ... -func WithNamespace(namespace string) OperationOpt { - return func(o *Operation) { - o.lock.Lock() - defer o.lock.Unlock() +func WithNamespace(namespace string) Opt { + return func(d *Database) { + d.lock.Lock() + defer d.lock.Unlock() - o.namespace = namespace + d.currentNamespace = namespace } } -// WithFormat ... -func WithFormat(format Format) OperationOpt { - return func(o *Operation) { - o.lock.Lock() - defer o.lock.Unlock() +// WithDiscardRatio ... +func WithDiscardRatio(ratio float64) Opt { + return func(d *Database) { + d.lock.Lock() + defer d.lock.Unlock() - o.format = format + d.discardRatio = ratio } } diff --git a/pkg/kv/op.go b/pkg/kv/op.go index 5343900..19e4ccd 100644 --- a/pkg/kv/op.go +++ b/pkg/kv/op.go @@ -1,41 +1,59 @@ package kv import ( + "fmt" "github.com/dgraph-io/badger/v4" + "github.com/fmjstudios/gopskit/pkg/helpers" "golang.org/x/sync/errgroup" + "strings" + "time" ) -// Get retrieves the value at a specific key from the database -func (d *Database) Get(key string) (value string, err error) { - buf := make([]byte, 0) - keyB := []byte(key) - g := new(errgroup.Group) +// enforce implementation of the interface +var _ Store = (*Database)(nil) + +// Get implements the Store interface for Database. It retrieves a key from the database with +// the given OperationOpt options to configure the current operation. The operation itself is +// handled asynchronously, although the method itself is also thread-safe. +func (d *Database) Get(key string) (value []byte, err error) { + // ensure we're getting clean keys + err = d.ensureNonNamespaced(key) + if err != nil { + return nil, err + } + k := []byte(key) + var bytes []byte + g := new(errgroup.Group) g.Go(func() error { - value, err := d.get(keyB) + value, err := d.get(k) if err != nil { return err } - buf = append(buf, value...) + bytes = append(bytes, value...) return nil }) err = g.Wait() if err != nil { - return "", err + return nil, err } - return string(buf), nil + return bytes, nil } +// get is the actual implementation of the retrieval of a value from the BadgerDB +// database. The key itself is namespaced beforehand to allow for multiple data +// models to be persisted simultaneously. func (d *Database) get(key []byte) (value []byte, err error) { d.lock.Lock() defer d.lock.Unlock() - buf := make([]byte, 0) + var bytes []byte + k := d.namespace(d.currentNamespace, string(key)) err = d.kv.View(func(txn *badger.Txn) error { - item, err := txn.Get(key) + item, err := txn.Get(k) if err != nil { return err } @@ -45,7 +63,7 @@ func (d *Database) get(key []byte) (value []byte, err error) { return err } - buf = append(buf, value...) + bytes = append(bytes, value...) return nil }) @@ -53,16 +71,22 @@ func (d *Database) get(key []byte) (value []byte, err error) { return nil, err } - return buf, nil + return bytes, nil } -// Set sets the value at a specific key within the database +// Set implements the Store interface for Database. It sets a key within the database to +// a certain value with the given OperationOpt options to configure the current operation. +// The operation itself is handled asynchronously, although the method itself is also thread-safe. func (d *Database) Set(key string, value []byte) error { - keyB := []byte(key) - g := new(errgroup.Group) + err := d.ensureNonNamespaced(key) + if err != nil { + return err + } + k := []byte(key) + g := new(errgroup.Group) g.Go(func() error { - err := d.set(keyB, value) + err := d.set(k, value) if err != nil { return err } @@ -70,7 +94,7 @@ func (d *Database) Set(key string, value []byte) error { return nil }) - err := g.Wait() + err = g.Wait() if err != nil { return err } @@ -78,13 +102,80 @@ func (d *Database) Set(key string, value []byte) error { return nil } +// get is the actual implementation of setting a value within the BadgerDB +// database. The key itself is namespaced beforehand to allow for multiple data +// models to be persisted simultaneously. func (d *Database) set(key, value []byte) error { d.lock.Lock() defer d.lock.Unlock() + k := d.namespace(d.currentNamespace, string(key)) + var err error + err = d.kv.Update(func(txn *badger.Txn) error { + err := txn.Set(k, value) + if err != nil { + return err + } + + return nil + }) + + if err != nil { + return err + } + + return nil +} + +// Has checks if the database contains a key by trying to read the value at the +// given key via the use of Get. If the method returns an error it is determined +// that the value does not exist and the error from Get is propagated alongside +// a false return value. Otherwise, it does and a nil-error is returned. +func (d *Database) Has(key string) (bool, error) { + err := d.ensureNonNamespaced(key) + if err != nil { + return false, err + } + k := []byte(key) + value, err := d.get(k) + if err != nil { + return false, err + } + + return len(value) > 0, nil +} + +// Namespaces returns the current namespaces the database has been initialized with. +// The function cannot error since by default only the (single) "default" namespace +// will be returned if the DB is otherwise largely unconfigured. +func (d *Database) Namespaces() []string { + d.lock.Lock() + defer d.lock.Unlock() + + return d.namespaces +} + +// Delete deletes a key from the database +func (d *Database) Delete(key string) error { + d.lock.Lock() + defer d.lock.Unlock() + + err := d.delete([]byte(key)) + if err != nil { + return err + } + + return nil +} + +func (d *Database) delete(key []byte) error { + d.lock.Lock() + defer d.lock.Unlock() + + k := d.namespace(d.currentNamespace, string(key)) var err error err = d.kv.Update(func(txn *badger.Txn) error { - err := txn.Set(key, value) + err := txn.Delete(k) if err != nil { return err } @@ -98,3 +189,66 @@ func (d *Database) set(key, value []byte) error { return nil } + +func (d *Database) Close() error { + d.lock.Lock() + defer d.lock.Unlock() + + doneC := make(chan struct{}, 1) + go d.gc(doneC) + <-doneC + + return d.kv.Close() +} + +func (d *Database) Path() string { + d.lock.Lock() + defer d.lock.Unlock() + + return d.path +} + +func (d *Database) Config() badger.Options { + d.lock.Lock() + defer d.lock.Unlock() + + return d.options +} + +// namespace namespaces a given key by prefixing the value with a namespace like "namespace/value". +// If an empty string is passed as the namespace, the DefaultNamespace "default" is used instead. +func (d *Database) namespace(namespace, key string) []byte { + if namespace == "" { + namespace = DefaultNamespace + } + + prefix := fmt.Sprintf("%s/", namespace) + return []byte(prefix + key) +} + +// ensure non-namespaced ensures that a given key value contains no slashes and consists only of +// letters, thereby equating to a valid key. +func (d *Database) ensureNonNamespaced(key string) error { + if strings.Contains(key, "/") && helpers.OnlyLetters(key) { + return fmt.Errorf("cannot set value for a namespaced key. please exclude namespaces from the key") + } + + return nil +} + +// gc runs the garbage-collection for the BadgerDB which saves filesystem space. It is run +// as a goroutine before closing the database connection. +func (d *Database) gc(doneChan chan struct{}) { + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + again: + err := d.kv.RunValueLogGC(0.7) + if err == nil { + goto again + } + } + + doneChan <- struct{}{} +} diff --git a/pkg/kv/types.go b/pkg/kv/types.go index 085c760..3d9a98d 100644 --- a/pkg/kv/types.go +++ b/pkg/kv/types.go @@ -8,8 +8,9 @@ import ( ) const ( - DefaultKVName = "store.db" - DefaultNamespace = "access" + DefaultKVName = "store" + DefaultNamespace = "default" + DefaultDiscardRatio = 0.7 ) var ( @@ -19,26 +20,32 @@ var ( // Database is the central type for the persisted Key-Value database type Database struct { - // Implementation - // - // kv is the underlying BadgerDB key-value store + // kv is the underlying BadgerDB key-value database, which we lock to run in an + // on-disk persistent mode which our CLI's can utilize. kv *badger.DB - // conf refers to the latest 'version' of the current badger.Options for kv - conf badger.Options + // options refers to a copy of the badger.Options with which the current instance + // badger.DB was initialized. This mainly provided for later reference rather than + // (re)-configuration. + options badger.Options // path is the filesystem path the database is persisted to path string - // ns represents the namespaces managed by the Database - ns []string + // namespaces are the namespaces managed by the Database. These have to be + // registered with the database at initialization using WithNamespace. By default, + // the database uses a single namespace "default". + namespaces []string - // opt is the currently used reference to an Operation object which configures - // the respective database Operation like Get, Set, Has, etc... - opt *Operation + // currentNamespace is the currently used namespace to write keys to. Since every + // database operation like Get, Set, etc. needs a namespace we use the "default" + // namespace if no other is provided. + currentNamespace string + + // discardRatio is the BadgerDB configuration value used to determine when a + // file can be rewritten. By default, a DefaultDiscardRatio of 0.7 will be used. + discardRatio float64 - // Asynchronicity - // // ctx represents the embedded database context to cancel operations ctx context.Context @@ -50,32 +57,43 @@ type Database struct { lock sync.Mutex } -// Operation is the configuration object for database operations -type Operation struct { - // namespace represents the namespace to operate on within the current database - namespace string - - // format is the output Format to use for the specific value - format Format - - // lock is a Mutex which ensures that only one goroutine may modify the configuration - // at a time - lock sync.Mutex -} - type Store interface { - Get(key string, opts ...OperationOpt) (value []byte, err error) - Set(key string, value []byte, opts ...OperationOpt) error - Has(key string, opts ...OperationOpt) (contains bool, err error) - Namespaces(opts ...OperationOpt) (namespaces []string, err error) - Delete(key string, opts ...OperationOpt) error + // Get retrieves the value of a given key within the current namespace from + // the BadgerDB database. If a different namespace is to be checked + // SetNamespace must be used beforehand. + Get(key string) (value []byte, err error) + + // Set updates or sets the value for a given key within the current namespace. + // It takes the value as a byte-slice instead of a string or a different type + // to offer more versatility. If a different namespace is to be checked + // SetNamespace must be used beforehand. + Set(key string, value []byte) error + + // Has checks whether a database key exists within the current namespace. If + // a different namespace is to be checked SetNamespace must be used beforehand. + Has(key string) (contains bool, err error) + + // Namespaces lists all currently registered namespaces in the Database + Namespaces() []string + + // Delete removes a key within the current namespace along with all of its data. + // This is a non-recoverable operation. If a different namespace is to be checked + // SetNamespace must be used beforehand. + Delete(key string) error + + // Close closes the underlying connection to the BadgerDB database. This would + // preferably be used with a `defer` statement in the main function since the + // database may be need for the entire duration of the program. Close() error + + // Path returns the filesystem path the database was initialized for. This value + // equal to the options.Dir and options.ValueDir fields. Path() string + + // Config returns the entire configuration with which the BadgerDB database was + // initialized. Config() badger.Options } // Opt is a configuration option for the newly initialized Key-Value database type Opt func(k *Database) - -// OperationOpt is configuration option for database commands like Get, Set, etc. -type OperationOpt func(o *Operation)