From 25ca34923f0374e38a0b0ebd1eecaccc4452f88d Mon Sep 17 00:00:00 2001 From: Fabio Bozzo Date: Wed, 13 Nov 2024 18:03:00 +0100 Subject: [PATCH] feat(container): versioning for the CBOR container --- pkg/container/car_test.go | 7 +++++++ pkg/container/reader.go | 33 +++++++++++++++++++++++++++------ pkg/container/serial_test.go | 27 +++++++++++++++++++++++++++ pkg/container/writer.go | 12 ++++++++---- 4 files changed, 69 insertions(+), 10 deletions(-) diff --git a/pkg/container/car_test.go b/pkg/container/car_test.go index 80f4b5d..5b20b8e 100644 --- a/pkg/container/car_test.go +++ b/pkg/container/car_test.go @@ -40,6 +40,13 @@ func TestCarRoundTrip(t *testing.T) { } func FuzzCarRoundTrip(f *testing.F) { + // Note: this fuzzing is somewhat broken. + // After some time, the fuzzer discover that a varint can be serialized in different + // ways that lead to the same integer value. This means that the CAR format can have + // multiple legal binary representation for the exact same data, which is what we are + // trying to detect here. Ideally, the format would be stricter, but that's how things + // are. + example, err := os.ReadFile("testdata/sample-v1.car") require.NoError(f, err) diff --git a/pkg/container/reader.go b/pkg/container/reader.go index db1e145..8c29fb1 100644 --- a/pkg/container/reader.go +++ b/pkg/container/reader.go @@ -98,15 +98,36 @@ func FromCbor(r io.Reader) (Reader, error) { if err != nil { return nil, err } - if n.Kind() != datamodel.Kind_List { - return nil, fmt.Errorf("not a list") + if n.Kind() != datamodel.Kind_Map { + return nil, fmt.Errorf("invalid container format: expected map") + } + if n.Length() != 1 { + return nil, fmt.Errorf("invalid container format: expected single version key") + } + + // get the first (and only) key-value pair + it := n.MapIterator() + key, tokensNode, err := it.Next() + if err != nil { + return nil, err } - ctn := make(Reader, n.Length()) + version, err := key.AsString() + if err != nil { + return nil, fmt.Errorf("invalid container format: version must be string") + } + if version != currentContainerVersion { + return nil, fmt.Errorf("unsupported container version: %s", version) + } + + if tokensNode.Kind() != datamodel.Kind_List { + return nil, fmt.Errorf("invalid container format: tokens must be a list") + } - it := n.ListIterator() - for !it.Done() { - _, val, err := it.Next() + ctn := make(Reader, tokensNode.Length()) + it2 := tokensNode.ListIterator() + for !it2.Done() { + _, val, err := it2.Next() if err != nil { return nil, err } diff --git a/pkg/container/serial_test.go b/pkg/container/serial_test.go index fea2efe..36560b0 100644 --- a/pkg/container/serial_test.go +++ b/pkg/container/serial_test.go @@ -176,3 +176,30 @@ func randToken() (*delegation.Token, cid.Cid, []byte) { } return t, c, b } + +func FuzzContainerRead(f *testing.F) { + // Generate a corpus + for tokenCount := 0; tokenCount < 10; tokenCount++ { + writer := NewWriter() + for i := 0; i < tokenCount; i++ { + _, c, data := randToken() + writer.AddSealed(c, data) + } + buf := bytes.NewBuffer(nil) + err := writer.ToCbor(buf) + require.NoError(f, err) + + f.Add(buf.Bytes()) + } + + f.Fuzz(func(t *testing.T, data []byte) { + start := time.Now() + + // search for panics + _, _ = FromCbor(bytes.NewReader(data)) + + if time.Since(start) > 100*time.Millisecond { + panic("too long") + } + }) +} diff --git a/pkg/container/writer.go b/pkg/container/writer.go index 310fa8a..e8d8c32 100644 --- a/pkg/container/writer.go +++ b/pkg/container/writer.go @@ -14,6 +14,8 @@ import ( // TODO: should we have a multibase to wrap the cbor? but there is no reader/write in go-multibase :-( +const currentContainerVersion = "ctn-v1" + // Writer is a token container writer. It provides a convenient way to aggregate and serialize tokens together. type Writer map[cid.Cid][]byte @@ -43,10 +45,12 @@ func (ctn Writer) ToCarBase64(w io.Writer) error { } func (ctn Writer) ToCbor(w io.Writer) error { - node, err := qp.BuildList(basicnode.Prototype.Any, int64(len(ctn)), func(la datamodel.ListAssembler) { - for _, bytes := range ctn { - qp.ListEntry(la, qp.Bytes(bytes)) - } + node, err := qp.BuildMap(basicnode.Prototype.Any, 1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, currentContainerVersion, qp.List(int64(len(ctn)), func(la datamodel.ListAssembler) { + for _, bytes := range ctn { + qp.ListEntry(la, qp.Bytes(bytes)) + } + })) }) if err != nil { return err