-
Notifications
You must be signed in to change notification settings - Fork 108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: blockstore: GetMany blockstore method #492
base: main
Are you sure you want to change the base?
Changes from 10 commits
34ddb5e
4ac2f26
d606bde
021f60a
e87fea2
a9aed04
8c22079
cfa2d14
6a6fa0d
bf9dcda
860cb76
b3ed048
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ package blockstore | |
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"sync" | ||
"sync/atomic" | ||
|
||
|
@@ -64,6 +65,12 @@ type Blockstore interface { | |
HashOnRead(enabled bool) | ||
} | ||
|
||
// GetManyBlockstore is a blockstore interface that supports a GetMany method | ||
type GetManyBlockstore interface { | ||
Blockstore | ||
GetMany(context.Context, []cid.Cid) ([]blocks.Block, []cid.Cid, error) | ||
} | ||
|
||
// Viewer can be implemented by blockstores that offer zero-copy access to | ||
// values. | ||
// | ||
|
@@ -310,6 +317,227 @@ func (bs *blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { | |
return output, nil | ||
} | ||
|
||
// GetManyOption is a getManyBlockStore option implementation | ||
type GetManyOption struct { | ||
f func(bs *getManyBlockStore) | ||
} | ||
|
||
// NewGetManyBlockstore returns a default GetManyBlockstore implementation | ||
// using the provided datastore.TxnDatastore backend. | ||
func NewGetManyBlockstore(d ds.TxnDatastore, opts ...GetManyOption) GetManyBlockstore { | ||
Jorropo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bs := &getManyBlockStore{ | ||
datastore: d, | ||
} | ||
|
||
for _, o := range opts { | ||
o.f(bs) | ||
} | ||
|
||
if !bs.noPrefix { | ||
bs.datastore = dsns.WrapTxnDatastore(bs.datastore, BlockPrefix) | ||
} | ||
return bs | ||
} | ||
|
||
type getManyBlockStore struct { | ||
datastore ds.TxnDatastore | ||
|
||
rehash atomic.Bool | ||
writeThrough bool | ||
noPrefix bool | ||
} | ||
|
||
func (bs *getManyBlockStore) HashOnRead(enabled bool) { | ||
bs.rehash.Store(enabled) | ||
} | ||
|
||
func (bs *getManyBlockStore) Get(ctx context.Context, k cid.Cid) (blocks.Block, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am surprised this function needs to be duplicated, I think you could use embeding here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is possible but would require passing in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another option would be to break the normal There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me know what you think the best/cleanest approach is here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would make it take the union of batching and txn datastores so it has access to all that is needed.
https://pkg.go.dev/github.com/ipfs/go-datastore#Batching So if you accept a TxnDatastore you can use a simple stub that implements batches using transactions, this would make thee code work in the only place it's used right now. |
||
if !k.Defined() { | ||
logger.Error("undefined cid in blockstore") | ||
return nil, ipld.ErrNotFound{Cid: k} | ||
} | ||
bdata, err := bs.datastore.Get(ctx, dshelp.MultihashToDsKey(k.Hash())) | ||
if err == ds.ErrNotFound { | ||
return nil, ipld.ErrNotFound{Cid: k} | ||
} | ||
if err != nil { | ||
return nil, err | ||
} | ||
if bs.rehash.Load() { | ||
rbcid, err := k.Prefix().Sum(bdata) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if !rbcid.Equals(k) { | ||
return nil, ErrHashMismatch | ||
} | ||
|
||
return blocks.NewBlockWithCid(bdata, rbcid) | ||
} | ||
return blocks.NewBlockWithCid(bdata, k) | ||
} | ||
|
||
func (bs *getManyBlockStore) GetMany(ctx context.Context, cs []cid.Cid) ([]blocks.Block, []cid.Cid, error) { | ||
if len(cs) == 1 { | ||
// performance fast-path | ||
block, err := bs.Get(ctx, cs[0]) | ||
return []blocks.Block{block}, nil, err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't make sense to not return the CID here given it's in the signature, but also it doesn't seem like |
||
} | ||
|
||
t, err := bs.datastore.NewTransaction(ctx, false) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
blks := make([]blocks.Block, 0, len(cs)) | ||
missingCIDs := make([]cid.Cid, 0, len(cs)) | ||
for _, c := range cs { | ||
if !c.Defined() { | ||
logger.Error("undefined cid in blockstore") | ||
return nil, nil, ipld.ErrNotFound{Cid: c} | ||
} | ||
bdata, err := t.Get(ctx, dshelp.MultihashToDsKey(c.Hash())) | ||
if err != nil { | ||
if err == ds.ErrNotFound { | ||
missingCIDs = append(missingCIDs, c) | ||
} else { | ||
return nil, nil, err | ||
} | ||
} else { | ||
if bs.rehash.Load() { | ||
rbcid, err := c.Prefix().Sum(bdata) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
if !rbcid.Equals(c) { | ||
return nil, nil, fmt.Errorf("block in storage has different hash (%x) than requested (%x)", rbcid.Hash(), c.Hash()) | ||
} | ||
|
||
blk, err := blocks.NewBlockWithCid(bdata, rbcid) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
blks = append(blks, blk) | ||
} else { | ||
blk, err := blocks.NewBlockWithCid(bdata, c) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
blks = append(blks, blk) | ||
} | ||
} | ||
} | ||
return blks, missingCIDs, t.Commit(ctx) | ||
} | ||
|
||
func (bs *getManyBlockStore) Put(ctx context.Context, block blocks.Block) error { | ||
k := dshelp.MultihashToDsKey(block.Cid().Hash()) | ||
|
||
// Has is cheaper than Put, so see if we already have it | ||
if !bs.writeThrough { | ||
exists, err := bs.datastore.Has(ctx, k) | ||
if err == nil && exists { | ||
return nil // already stored. | ||
} | ||
} | ||
return bs.datastore.Put(ctx, k, block.RawData()) | ||
} | ||
|
||
func (bs *getManyBlockStore) PutMany(ctx context.Context, blocks []blocks.Block) error { | ||
if len(blocks) == 1 { | ||
// performance fast-path | ||
return bs.Put(ctx, blocks[0]) | ||
} | ||
|
||
t, err := bs.datastore.NewTransaction(ctx, false) | ||
if err != nil { | ||
return err | ||
} | ||
for _, b := range blocks { | ||
k := dshelp.MultihashToDsKey(b.Cid().Hash()) | ||
|
||
if !bs.writeThrough { | ||
exists, err := bs.datastore.Has(ctx, k) | ||
if err == nil && exists { | ||
continue | ||
} | ||
} | ||
|
||
err = t.Put(ctx, k, b.RawData()) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return t.Commit(ctx) | ||
} | ||
|
||
func (bs *getManyBlockStore) Has(ctx context.Context, k cid.Cid) (bool, error) { | ||
return bs.datastore.Has(ctx, dshelp.MultihashToDsKey(k.Hash())) | ||
} | ||
|
||
func (bs *getManyBlockStore) GetSize(ctx context.Context, k cid.Cid) (int, error) { | ||
size, err := bs.datastore.GetSize(ctx, dshelp.MultihashToDsKey(k.Hash())) | ||
if err == ds.ErrNotFound { | ||
return -1, ipld.ErrNotFound{Cid: k} | ||
} | ||
return size, err | ||
} | ||
|
||
func (bs *getManyBlockStore) DeleteBlock(ctx context.Context, k cid.Cid) error { | ||
return bs.datastore.Delete(ctx, dshelp.MultihashToDsKey(k.Hash())) | ||
} | ||
|
||
// AllKeysChan runs a query for keys from the blockstore. | ||
// this is very simplistic, in the future, take dsq.Query as a param? | ||
// | ||
// AllKeysChan respects context. | ||
func (bs *getManyBlockStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { | ||
|
||
// KeysOnly, because that would be _a lot_ of data. | ||
q := dsq.Query{KeysOnly: true} | ||
res, err := bs.datastore.Query(ctx, q) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
output := make(chan cid.Cid, dsq.KeysOnlyBufSize) | ||
go func() { | ||
defer func() { | ||
res.Close() // ensure exit (signals early exit, too) | ||
close(output) | ||
}() | ||
|
||
for { | ||
e, ok := res.NextSync() | ||
if !ok { | ||
return | ||
} | ||
if e.Error != nil { | ||
logger.Errorf("blockstore.AllKeysChan got err: %s", e.Error) | ||
return | ||
} | ||
|
||
// need to convert to key.Key using key.KeyFromDsKey. | ||
bk, err := dshelp.BinaryFromDsKey(ds.RawKey(e.Key)) | ||
if err != nil { | ||
logger.Warnf("error parsing key from binary: %s", err) | ||
continue | ||
} | ||
k := cid.NewCidV1(cid.Raw, bk) | ||
select { | ||
case <-ctx.Done(): | ||
return | ||
case output <- k: | ||
} | ||
} | ||
}() | ||
|
||
return output, nil | ||
} | ||
|
||
// NewGCLocker returns a default implementation of | ||
// GCLocker using standard [RW] mutexes. | ||
func NewGCLocker() GCLocker { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two recommendations:
[]blocks.Block
and[]cid.Cid
sinceBlock
contains a.Cid()
method https://github.com/ipfs/go-block-format/blob/v0.2.0/blocks.go#L19-L25(<-chan blocks.Block, error)
or(<-chan BlockOption
) depending on how you want errors returnedIf returning an asynchronous object (e.g. channel or iterator) might be worth taking a look at ipfs/kubo#4592 to make sure you don't run into some common pitfalls. With Go generics now iterators may also make this easier than it used to be.