diff --git a/api/gen/proto/go/metastore/v1/types.pb.go b/api/gen/proto/go/metastore/v1/types.pb.go index aef37a29c1..6df3108ee6 100644 --- a/api/gen/proto/go/metastore/v1/types.pb.go +++ b/api/gen/proto/go/metastore/v1/types.pb.go @@ -7,7 +7,6 @@ package metastorev1 import ( - v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" reflect "reflect" @@ -89,17 +88,23 @@ type BlockMeta struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - FormatVersion uint64 `protobuf:"varint,1,opt,name=format_version,json=formatVersion,proto3" json:"format_version,omitempty"` - Id string `protobuf:"bytes,2,opt,name=id,proto3" json:"id,omitempty"` - MinTime int64 `protobuf:"varint,3,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` - MaxTime int64 `protobuf:"varint,4,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` - Shard uint32 `protobuf:"varint,5,opt,name=shard,proto3" json:"shard,omitempty"` - CompactionLevel uint32 `protobuf:"varint,6,opt,name=compaction_level,json=compactionLevel,proto3" json:"compaction_level,omitempty"` + FormatVersion uint32 `protobuf:"varint,1,opt,name=format_version,json=formatVersion,proto3" json:"format_version,omitempty"` + // Block ID is a unique identifier for the block. + // This is the only field that is not included into + // the string table. + Id string `protobuf:"bytes,2,opt,name=id,proto3" json:"id,omitempty"` // Optional. Empty if compaction level is 0. - TenantId string `protobuf:"bytes,7,opt,name=tenant_id,json=tenantId,proto3" json:"tenant_id,omitempty"` - Datasets []*Dataset `protobuf:"bytes,8,rep,name=datasets,proto3" json:"datasets,omitempty"` - Size uint64 `protobuf:"varint,9,opt,name=size,proto3" json:"size,omitempty"` - CreatedBy string `protobuf:"bytes,10,opt,name=created_by,json=createdBy,proto3" json:"created_by,omitempty"` + Tenant int32 `protobuf:"varint,3,opt,name=tenant,proto3" json:"tenant,omitempty"` + Shard uint32 `protobuf:"varint,4,opt,name=shard,proto3" json:"shard,omitempty"` + CompactionLevel uint32 `protobuf:"varint,5,opt,name=compaction_level,json=compactionLevel,proto3" json:"compaction_level,omitempty"` + MinTime int64 `protobuf:"varint,6,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` + MaxTime int64 `protobuf:"varint,7,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` + CreatedBy int32 `protobuf:"varint,8,opt,name=created_by,json=createdBy,proto3" json:"created_by,omitempty"` + Size uint64 `protobuf:"varint,9,opt,name=size,proto3" json:"size,omitempty"` + Datasets []*Dataset `protobuf:"bytes,10,rep,name=datasets,proto3" json:"datasets,omitempty"` + // String table contains strings of the block. + // By convention, the first string is always an empty string. + StringTable []string `protobuf:"bytes,11,rep,name=string_table,json=stringTable,proto3" json:"string_table,omitempty"` } func (x *BlockMeta) Reset() { @@ -134,7 +139,7 @@ func (*BlockMeta) Descriptor() ([]byte, []int) { return file_metastore_v1_types_proto_rawDescGZIP(), []int{1} } -func (x *BlockMeta) GetFormatVersion() uint64 { +func (x *BlockMeta) GetFormatVersion() uint32 { if x != nil { return x.FormatVersion } @@ -148,46 +153,46 @@ func (x *BlockMeta) GetId() string { return "" } -func (x *BlockMeta) GetMinTime() int64 { +func (x *BlockMeta) GetTenant() int32 { if x != nil { - return x.MinTime + return x.Tenant } return 0 } -func (x *BlockMeta) GetMaxTime() int64 { +func (x *BlockMeta) GetShard() uint32 { if x != nil { - return x.MaxTime + return x.Shard } return 0 } -func (x *BlockMeta) GetShard() uint32 { +func (x *BlockMeta) GetCompactionLevel() uint32 { if x != nil { - return x.Shard + return x.CompactionLevel } return 0 } -func (x *BlockMeta) GetCompactionLevel() uint32 { +func (x *BlockMeta) GetMinTime() int64 { if x != nil { - return x.CompactionLevel + return x.MinTime } return 0 } -func (x *BlockMeta) GetTenantId() string { +func (x *BlockMeta) GetMaxTime() int64 { if x != nil { - return x.TenantId + return x.MaxTime } - return "" + return 0 } -func (x *BlockMeta) GetDatasets() []*Dataset { +func (x *BlockMeta) GetCreatedBy() int32 { if x != nil { - return x.Datasets + return x.CreatedBy } - return nil + return 0 } func (x *BlockMeta) GetSize() uint64 { @@ -197,11 +202,18 @@ func (x *BlockMeta) GetSize() uint64 { return 0 } -func (x *BlockMeta) GetCreatedBy() string { +func (x *BlockMeta) GetDatasets() []*Dataset { if x != nil { - return x.CreatedBy + return x.Datasets } - return "" + return nil +} + +func (x *BlockMeta) GetStringTable() []string { + if x != nil { + return x.StringTable + } + return nil } type Dataset struct { @@ -209,10 +221,10 @@ type Dataset struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TenantId string `protobuf:"bytes,1,opt,name=tenant_id,json=tenantId,proto3" json:"tenant_id,omitempty"` - Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` - MinTime int64 `protobuf:"varint,3,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` - MaxTime int64 `protobuf:"varint,4,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` + Tenant int32 `protobuf:"varint,1,opt,name=tenant,proto3" json:"tenant,omitempty"` + Name int32 `protobuf:"varint,2,opt,name=name,proto3" json:"name,omitempty"` + MinTime int64 `protobuf:"varint,3,opt,name=min_time,json=minTime,proto3" json:"min_time,omitempty"` + MaxTime int64 `protobuf:"varint,4,opt,name=max_time,json=maxTime,proto3" json:"max_time,omitempty"` // Table of contents lists data sections within the tenant // service region. The offsets are absolute. // @@ -226,8 +238,9 @@ type Dataset struct { Size uint64 `protobuf:"varint,6,opt,name=size,proto3" json:"size,omitempty"` // TODO: Delete. Use labels instead. // Profile types present in the tenant service data. - ProfileTypes []string `protobuf:"bytes,7,rep,name=profile_types,json=profileTypes,proto3" json:"profile_types,omitempty"` - Labels []*v1.Labels `protobuf:"bytes,8,rep,name=labels,proto3" json:"labels,omitempty"` + ProfileTypes []int32 `protobuf:"varint,7,rep,packed,name=profile_types,json=profileTypes,proto3" json:"profile_types,omitempty"` + // Length prefixed label key-value pairs. + Labels []int32 `protobuf:"varint,8,rep,packed,name=labels,proto3" json:"labels,omitempty"` } func (x *Dataset) Reset() { @@ -262,18 +275,18 @@ func (*Dataset) Descriptor() ([]byte, []int) { return file_metastore_v1_types_proto_rawDescGZIP(), []int{2} } -func (x *Dataset) GetTenantId() string { +func (x *Dataset) GetTenant() int32 { if x != nil { - return x.TenantId + return x.Tenant } - return "" + return 0 } -func (x *Dataset) GetName() string { +func (x *Dataset) GetName() int32 { if x != nil { return x.Name } - return "" + return 0 } func (x *Dataset) GetMinTime() int64 { @@ -304,14 +317,14 @@ func (x *Dataset) GetSize() uint64 { return 0 } -func (x *Dataset) GetProfileTypes() []string { +func (x *Dataset) GetProfileTypes() []int32 { if x != nil { return x.ProfileTypes } return nil } -func (x *Dataset) GetLabels() []*v1.Labels { +func (x *Dataset) GetLabels() []int32 { if x != nil { return x.Labels } @@ -323,62 +336,61 @@ var File_metastore_v1_types_proto protoreflect.FileDescriptor var file_metastore_v1_types_proto_rawDesc = []byte{ 0x0a, 0x18, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x76, 0x31, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x6d, 0x65, 0x74, 0x61, - 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x1a, 0x14, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, - 0x76, 0x31, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x51, - 0x0a, 0x09, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, - 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x65, 0x6e, - 0x61, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x68, 0x61, 0x72, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x05, 0x73, 0x68, 0x61, 0x72, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x62, 0x6c, 0x6f, - 0x63, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x62, 0x6c, 0x6f, 0x63, 0x6b, - 0x73, 0x22, 0xbc, 0x02, 0x0a, 0x09, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4d, 0x65, 0x74, 0x61, 0x12, - 0x25, 0x0a, 0x0e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0d, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x69, 0x6e, 0x5f, 0x74, 0x69, - 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, 0x69, 0x6e, 0x54, 0x69, 0x6d, - 0x65, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, 0x61, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, - 0x73, 0x68, 0x61, 0x72, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x73, 0x68, 0x61, - 0x72, 0x64, 0x12, 0x29, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, - 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x6f, - 0x6d, 0x70, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x1b, 0x0a, - 0x09, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x31, 0x0a, 0x08, 0x64, 0x61, - 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, - 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x61, 0x74, 0x61, - 0x73, 0x65, 0x74, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x12, 0x12, 0x0a, - 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, - 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x18, - 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x42, 0x79, - 0x22, 0xff, 0x01, 0x0a, 0x07, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x12, 0x1b, 0x0a, 0x09, - 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x19, 0x0a, - 0x08, 0x6d, 0x69, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, - 0x07, 0x6d, 0x69, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x5f, - 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, 0x61, 0x78, 0x54, - 0x69, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x11, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6f, 0x66, 0x5f, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x0f, - 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x66, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x73, 0x12, - 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, - 0x69, 0x7a, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x72, 0x6f, 0x66, - 0x69, 0x6c, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x28, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, - 0x6c, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, - 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, - 0x6c, 0x73, 0x42, 0xb7, 0x01, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x73, - 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x42, 0x0a, 0x54, 0x79, 0x70, 0x65, 0x73, 0x50, 0x72, - 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x46, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x67, 0x72, 0x61, 0x66, 0x61, 0x6e, 0x61, 0x2f, 0x70, 0x79, 0x72, 0x6f, 0x73, 0x63, - 0x6f, 0x70, 0x65, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x76, - 0x31, 0x3b, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0xa2, 0x02, 0x03, - 0x4d, 0x58, 0x58, 0xaa, 0x02, 0x0c, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, - 0x56, 0x31, 0xca, 0x02, 0x0c, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5c, 0x56, - 0x31, 0xe2, 0x02, 0x18, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5c, 0x56, 0x31, - 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0d, 0x4d, - 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x3a, 0x3a, 0x56, 0x31, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x22, 0x51, 0x0a, 0x09, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x12, 0x14, 0x0a, + 0x05, 0x73, 0x68, 0x61, 0x72, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x73, 0x68, + 0x61, 0x72, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x18, 0x03, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x06, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x22, 0xda, 0x02, 0x0a, 0x09, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4d, 0x65, 0x74, 0x61, 0x12, 0x25, 0x0a, 0x0e, 0x66, 0x6f, 0x72, + 0x6d, 0x61, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x0d, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, + 0x12, 0x16, 0x0a, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x68, 0x61, 0x72, + 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x73, 0x68, 0x61, 0x72, 0x64, 0x12, 0x29, + 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6c, 0x65, 0x76, + 0x65, 0x6c, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x6f, 0x6d, 0x70, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x69, 0x6e, + 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, 0x69, 0x6e, + 0x54, 0x69, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, 0x61, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x12, + 0x1d, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x79, 0x18, 0x08, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x42, 0x79, 0x12, 0x12, + 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, + 0x7a, 0x65, 0x12, 0x31, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x0a, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, + 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x52, 0x08, 0x64, 0x61, 0x74, + 0x61, 0x73, 0x65, 0x74, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, + 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x0b, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x73, 0x74, 0x72, + 0x69, 0x6e, 0x67, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x22, 0xe8, 0x01, 0x0a, 0x07, 0x44, 0x61, 0x74, + 0x61, 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x69, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x07, 0x6d, 0x69, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x6d, + 0x61, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6d, + 0x61, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x11, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, + 0x6f, 0x66, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, + 0x04, 0x52, 0x0f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x66, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, + 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, + 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x72, 0x6f, 0x66, 0x69, 0x6c, + 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x05, 0x52, 0x0c, 0x70, + 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x6c, 0x61, 0x62, + 0x65, 0x6c, 0x73, 0x42, 0xb7, 0x01, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x2e, 0x6d, 0x65, 0x74, 0x61, + 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x42, 0x0a, 0x54, 0x79, 0x70, 0x65, 0x73, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x46, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x67, 0x72, 0x61, 0x66, 0x61, 0x6e, 0x61, 0x2f, 0x70, 0x79, 0x72, 0x6f, 0x73, + 0x63, 0x6f, 0x70, 0x65, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, + 0x76, 0x31, 0x3b, 0x6d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0xa2, 0x02, + 0x03, 0x4d, 0x58, 0x58, 0xaa, 0x02, 0x0c, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, + 0x2e, 0x56, 0x31, 0xca, 0x02, 0x0c, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5c, + 0x56, 0x31, 0xe2, 0x02, 0x18, 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5c, 0x56, + 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0d, + 0x4d, 0x65, 0x74, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x3a, 0x3a, 0x56, 0x31, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -398,16 +410,14 @@ var file_metastore_v1_types_proto_goTypes = []any{ (*BlockList)(nil), // 0: metastore.v1.BlockList (*BlockMeta)(nil), // 1: metastore.v1.BlockMeta (*Dataset)(nil), // 2: metastore.v1.Dataset - (*v1.Labels)(nil), // 3: types.v1.Labels } var file_metastore_v1_types_proto_depIdxs = []int32{ 2, // 0: metastore.v1.BlockMeta.datasets:type_name -> metastore.v1.Dataset - 3, // 1: metastore.v1.Dataset.labels:type_name -> types.v1.Labels - 2, // [2:2] is the sub-list for method output_type - 2, // [2:2] is the sub-list for method input_type - 2, // [2:2] is the sub-list for extension type_name - 2, // [2:2] is the sub-list for extension extendee - 0, // [0:2] is the sub-list for field type_name + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name } func init() { file_metastore_v1_types_proto_init() } diff --git a/api/gen/proto/go/metastore/v1/types_vtproto.pb.go b/api/gen/proto/go/metastore/v1/types_vtproto.pb.go index 8a47bc41f8..349ea3dd4f 100644 --- a/api/gen/proto/go/metastore/v1/types_vtproto.pb.go +++ b/api/gen/proto/go/metastore/v1/types_vtproto.pb.go @@ -6,7 +6,6 @@ package metastorev1 import ( fmt "fmt" - v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" protohelpers "github.com/planetscale/vtprotobuf/protohelpers" proto "google.golang.org/protobuf/proto" protoimpl "google.golang.org/protobuf/runtime/protoimpl" @@ -50,13 +49,13 @@ func (m *BlockMeta) CloneVT() *BlockMeta { r := new(BlockMeta) r.FormatVersion = m.FormatVersion r.Id = m.Id - r.MinTime = m.MinTime - r.MaxTime = m.MaxTime + r.Tenant = m.Tenant r.Shard = m.Shard r.CompactionLevel = m.CompactionLevel - r.TenantId = m.TenantId - r.Size = m.Size + r.MinTime = m.MinTime + r.MaxTime = m.MaxTime r.CreatedBy = m.CreatedBy + r.Size = m.Size if rhs := m.Datasets; rhs != nil { tmpContainer := make([]*Dataset, len(rhs)) for k, v := range rhs { @@ -64,6 +63,11 @@ func (m *BlockMeta) CloneVT() *BlockMeta { } r.Datasets = tmpContainer } + if rhs := m.StringTable; rhs != nil { + tmpContainer := make([]string, len(rhs)) + copy(tmpContainer, rhs) + r.StringTable = tmpContainer + } if len(m.unknownFields) > 0 { r.unknownFields = make([]byte, len(m.unknownFields)) copy(r.unknownFields, m.unknownFields) @@ -80,7 +84,7 @@ func (m *Dataset) CloneVT() *Dataset { return (*Dataset)(nil) } r := new(Dataset) - r.TenantId = m.TenantId + r.Tenant = m.Tenant r.Name = m.Name r.MinTime = m.MinTime r.MaxTime = m.MaxTime @@ -91,19 +95,13 @@ func (m *Dataset) CloneVT() *Dataset { r.TableOfContents = tmpContainer } if rhs := m.ProfileTypes; rhs != nil { - tmpContainer := make([]string, len(rhs)) + tmpContainer := make([]int32, len(rhs)) copy(tmpContainer, rhs) r.ProfileTypes = tmpContainer } if rhs := m.Labels; rhs != nil { - tmpContainer := make([]*v1.Labels, len(rhs)) - for k, v := range rhs { - if vtpb, ok := interface{}(v).(interface{ CloneVT() *v1.Labels }); ok { - tmpContainer[k] = vtpb.CloneVT() - } else { - tmpContainer[k] = proto.Clone(v).(*v1.Labels) - } - } + tmpContainer := make([]int32, len(rhs)) + copy(tmpContainer, rhs) r.Labels = tmpContainer } if len(m.unknownFields) > 0 { @@ -160,10 +158,7 @@ func (this *BlockMeta) EqualVT(that *BlockMeta) bool { if this.Id != that.Id { return false } - if this.MinTime != that.MinTime { - return false - } - if this.MaxTime != that.MaxTime { + if this.Tenant != that.Tenant { return false } if this.Shard != that.Shard { @@ -172,7 +167,16 @@ func (this *BlockMeta) EqualVT(that *BlockMeta) bool { if this.CompactionLevel != that.CompactionLevel { return false } - if this.TenantId != that.TenantId { + if this.MinTime != that.MinTime { + return false + } + if this.MaxTime != that.MaxTime { + return false + } + if this.CreatedBy != that.CreatedBy { + return false + } + if this.Size != that.Size { return false } if len(this.Datasets) != len(that.Datasets) { @@ -192,11 +196,14 @@ func (this *BlockMeta) EqualVT(that *BlockMeta) bool { } } } - if this.Size != that.Size { + if len(this.StringTable) != len(that.StringTable) { return false } - if this.CreatedBy != that.CreatedBy { - return false + for i, vx := range this.StringTable { + vy := that.StringTable[i] + if vx != vy { + return false + } } return string(this.unknownFields) == string(that.unknownFields) } @@ -214,7 +221,7 @@ func (this *Dataset) EqualVT(that *Dataset) bool { } else if this == nil || that == nil { return false } - if this.TenantId != that.TenantId { + if this.Tenant != that.Tenant { return false } if this.Name != that.Name { @@ -252,20 +259,8 @@ func (this *Dataset) EqualVT(that *Dataset) bool { } for i, vx := range this.Labels { vy := that.Labels[i] - if p, q := vx, vy; p != q { - if p == nil { - p = &v1.Labels{} - } - if q == nil { - q = &v1.Labels{} - } - if equal, ok := interface{}(p).(interface{ EqualVT(*v1.Labels) bool }); ok { - if !equal.EqualVT(q) { - return false - } - } else if !proto.Equal(p, q) { - return false - } + if vx != vy { + return false } } return string(this.unknownFields) == string(that.unknownFields) @@ -362,17 +357,14 @@ func (m *BlockMeta) MarshalToSizedBufferVT(dAtA []byte) (int, error) { i -= len(m.unknownFields) copy(dAtA[i:], m.unknownFields) } - if len(m.CreatedBy) > 0 { - i -= len(m.CreatedBy) - copy(dAtA[i:], m.CreatedBy) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.CreatedBy))) - i-- - dAtA[i] = 0x52 - } - if m.Size != 0 { - i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Size)) - i-- - dAtA[i] = 0x48 + if len(m.StringTable) > 0 { + for iNdEx := len(m.StringTable) - 1; iNdEx >= 0; iNdEx-- { + i -= len(m.StringTable[iNdEx]) + copy(dAtA[i:], m.StringTable[iNdEx]) + i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.StringTable[iNdEx]))) + i-- + dAtA[i] = 0x5a + } } if len(m.Datasets) > 0 { for iNdEx := len(m.Datasets) - 1; iNdEx >= 0; iNdEx-- { @@ -383,33 +375,41 @@ func (m *BlockMeta) MarshalToSizedBufferVT(dAtA []byte) (int, error) { i -= size i = protohelpers.EncodeVarint(dAtA, i, uint64(size)) i-- - dAtA[i] = 0x42 + dAtA[i] = 0x52 } } - if len(m.TenantId) > 0 { - i -= len(m.TenantId) - copy(dAtA[i:], m.TenantId) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.TenantId))) + if m.Size != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Size)) i-- - dAtA[i] = 0x3a + dAtA[i] = 0x48 } - if m.CompactionLevel != 0 { - i = protohelpers.EncodeVarint(dAtA, i, uint64(m.CompactionLevel)) + if m.CreatedBy != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.CreatedBy)) + i-- + dAtA[i] = 0x40 + } + if m.MaxTime != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.MaxTime)) + i-- + dAtA[i] = 0x38 + } + if m.MinTime != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.MinTime)) i-- dAtA[i] = 0x30 } - if m.Shard != 0 { - i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Shard)) + if m.CompactionLevel != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.CompactionLevel)) i-- dAtA[i] = 0x28 } - if m.MaxTime != 0 { - i = protohelpers.EncodeVarint(dAtA, i, uint64(m.MaxTime)) + if m.Shard != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Shard)) i-- dAtA[i] = 0x20 } - if m.MinTime != 0 { - i = protohelpers.EncodeVarint(dAtA, i, uint64(m.MinTime)) + if m.Tenant != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Tenant)) i-- dAtA[i] = 0x18 } @@ -459,37 +459,46 @@ func (m *Dataset) MarshalToSizedBufferVT(dAtA []byte) (int, error) { copy(dAtA[i:], m.unknownFields) } if len(m.Labels) > 0 { - for iNdEx := len(m.Labels) - 1; iNdEx >= 0; iNdEx-- { - if vtmsg, ok := interface{}(m.Labels[iNdEx]).(interface { - MarshalToSizedBufferVT([]byte) (int, error) - }); ok { - size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = protohelpers.EncodeVarint(dAtA, i, uint64(size)) - } else { - encoded, err := proto.Marshal(m.Labels[iNdEx]) - if err != nil { - return 0, err - } - i -= len(encoded) - copy(dAtA[i:], encoded) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(encoded))) + var pksize2 int + for _, num := range m.Labels { + pksize2 += protohelpers.SizeOfVarint(uint64(num)) + } + i -= pksize2 + j1 := i + for _, num1 := range m.Labels { + num := uint64(num1) + for num >= 1<<7 { + dAtA[j1] = uint8(uint64(num)&0x7f | 0x80) + num >>= 7 + j1++ } - i-- - dAtA[i] = 0x42 + dAtA[j1] = uint8(num) + j1++ } + i = protohelpers.EncodeVarint(dAtA, i, uint64(pksize2)) + i-- + dAtA[i] = 0x42 } if len(m.ProfileTypes) > 0 { - for iNdEx := len(m.ProfileTypes) - 1; iNdEx >= 0; iNdEx-- { - i -= len(m.ProfileTypes[iNdEx]) - copy(dAtA[i:], m.ProfileTypes[iNdEx]) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.ProfileTypes[iNdEx]))) - i-- - dAtA[i] = 0x3a + var pksize4 int + for _, num := range m.ProfileTypes { + pksize4 += protohelpers.SizeOfVarint(uint64(num)) + } + i -= pksize4 + j3 := i + for _, num1 := range m.ProfileTypes { + num := uint64(num1) + for num >= 1<<7 { + dAtA[j3] = uint8(uint64(num)&0x7f | 0x80) + num >>= 7 + j3++ + } + dAtA[j3] = uint8(num) + j3++ } + i = protohelpers.EncodeVarint(dAtA, i, uint64(pksize4)) + i-- + dAtA[i] = 0x3a } if m.Size != 0 { i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Size)) @@ -497,22 +506,22 @@ func (m *Dataset) MarshalToSizedBufferVT(dAtA []byte) (int, error) { dAtA[i] = 0x30 } if len(m.TableOfContents) > 0 { - var pksize2 int + var pksize6 int for _, num := range m.TableOfContents { - pksize2 += protohelpers.SizeOfVarint(uint64(num)) + pksize6 += protohelpers.SizeOfVarint(uint64(num)) } - i -= pksize2 - j1 := i + i -= pksize6 + j5 := i for _, num := range m.TableOfContents { for num >= 1<<7 { - dAtA[j1] = uint8(uint64(num)&0x7f | 0x80) + dAtA[j5] = uint8(uint64(num)&0x7f | 0x80) num >>= 7 - j1++ + j5++ } - dAtA[j1] = uint8(num) - j1++ + dAtA[j5] = uint8(num) + j5++ } - i = protohelpers.EncodeVarint(dAtA, i, uint64(pksize2)) + i = protohelpers.EncodeVarint(dAtA, i, uint64(pksize6)) i-- dAtA[i] = 0x2a } @@ -526,19 +535,15 @@ func (m *Dataset) MarshalToSizedBufferVT(dAtA []byte) (int, error) { i-- dAtA[i] = 0x18 } - if len(m.Name) > 0 { - i -= len(m.Name) - copy(dAtA[i:], m.Name) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name))) + if m.Name != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Name)) i-- - dAtA[i] = 0x12 + dAtA[i] = 0x10 } - if len(m.TenantId) > 0 { - i -= len(m.TenantId) - copy(dAtA[i:], m.TenantId) - i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.TenantId))) + if m.Tenant != 0 { + i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Tenant)) i-- - dAtA[i] = 0xa + dAtA[i] = 0x8 } return len(dAtA) - i, nil } @@ -579,11 +584,8 @@ func (m *BlockMeta) SizeVT() (n int) { if l > 0 { n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) } - if m.MinTime != 0 { - n += 1 + protohelpers.SizeOfVarint(uint64(m.MinTime)) - } - if m.MaxTime != 0 { - n += 1 + protohelpers.SizeOfVarint(uint64(m.MaxTime)) + if m.Tenant != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.Tenant)) } if m.Shard != 0 { n += 1 + protohelpers.SizeOfVarint(uint64(m.Shard)) @@ -591,9 +593,17 @@ func (m *BlockMeta) SizeVT() (n int) { if m.CompactionLevel != 0 { n += 1 + protohelpers.SizeOfVarint(uint64(m.CompactionLevel)) } - l = len(m.TenantId) - if l > 0 { - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + if m.MinTime != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.MinTime)) + } + if m.MaxTime != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.MaxTime)) + } + if m.CreatedBy != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.CreatedBy)) + } + if m.Size != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.Size)) } if len(m.Datasets) > 0 { for _, e := range m.Datasets { @@ -601,12 +611,11 @@ func (m *BlockMeta) SizeVT() (n int) { n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) } } - if m.Size != 0 { - n += 1 + protohelpers.SizeOfVarint(uint64(m.Size)) - } - l = len(m.CreatedBy) - if l > 0 { - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + if len(m.StringTable) > 0 { + for _, s := range m.StringTable { + l = len(s) + n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + } } n += len(m.unknownFields) return n @@ -618,13 +627,11 @@ func (m *Dataset) SizeVT() (n int) { } var l int _ = l - l = len(m.TenantId) - if l > 0 { - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + if m.Tenant != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.Tenant)) } - l = len(m.Name) - if l > 0 { - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + if m.Name != 0 { + n += 1 + protohelpers.SizeOfVarint(uint64(m.Name)) } if m.MinTime != 0 { n += 1 + protohelpers.SizeOfVarint(uint64(m.MinTime)) @@ -643,22 +650,18 @@ func (m *Dataset) SizeVT() (n int) { n += 1 + protohelpers.SizeOfVarint(uint64(m.Size)) } if len(m.ProfileTypes) > 0 { - for _, s := range m.ProfileTypes { - l = len(s) - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + l = 0 + for _, e := range m.ProfileTypes { + l += protohelpers.SizeOfVarint(uint64(e)) } + n += 1 + protohelpers.SizeOfVarint(uint64(l)) + l } if len(m.Labels) > 0 { + l = 0 for _, e := range m.Labels { - if size, ok := interface{}(e).(interface { - SizeVT() int - }); ok { - l = size.SizeVT() - } else { - l = proto.Size(e) - } - n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + l += protohelpers.SizeOfVarint(uint64(e)) } + n += 1 + protohelpers.SizeOfVarint(uint64(l)) + l } n += len(m.unknownFields) return n @@ -841,7 +844,7 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.FormatVersion |= uint64(b&0x7F) << shift + m.FormatVersion |= uint32(b&0x7F) << shift if b < 0x80 { break } @@ -880,9 +883,9 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { iNdEx = postIndex case 3: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field MinTime", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Tenant", wireType) } - m.MinTime = 0 + m.Tenant = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -892,16 +895,16 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.MinTime |= int64(b&0x7F) << shift + m.Tenant |= int32(b&0x7F) << shift if b < 0x80 { break } } case 4: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field MaxTime", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Shard", wireType) } - m.MaxTime = 0 + m.Shard = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -911,16 +914,16 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.MaxTime |= int64(b&0x7F) << shift + m.Shard |= uint32(b&0x7F) << shift if b < 0x80 { break } } case 5: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Shard", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field CompactionLevel", wireType) } - m.Shard = 0 + m.CompactionLevel = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -930,16 +933,16 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.Shard |= uint32(b&0x7F) << shift + m.CompactionLevel |= uint32(b&0x7F) << shift if b < 0x80 { break } } case 6: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field CompactionLevel", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field MinTime", wireType) } - m.CompactionLevel = 0 + m.MinTime = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -949,16 +952,16 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.CompactionLevel |= uint32(b&0x7F) << shift + m.MinTime |= int64(b&0x7F) << shift if b < 0x80 { break } } case 7: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field TenantId", wireType) + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxTime", wireType) } - var stringLen uint64 + m.MaxTime = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -968,25 +971,50 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + m.MaxTime |= int64(b&0x7F) << shift if b < 0x80 { break } } - intStringLen := int(stringLen) - if intStringLen < 0 { - return protohelpers.ErrInvalidLength + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field CreatedBy", wireType) } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return protohelpers.ErrInvalidLength + m.CreatedBy = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.CreatedBy |= int32(b&0x7F) << shift + if b < 0x80 { + break + } } - if postIndex > l { - return io.ErrUnexpectedEOF + case 9: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Size", wireType) } - m.TenantId = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex - case 8: + m.Size = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Size |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 10: if wireType != 2 { return fmt.Errorf("proto: wrong wireType = %d for field Datasets", wireType) } @@ -1020,28 +1048,9 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { return err } iNdEx = postIndex - case 9: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Size", wireType) - } - m.Size = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return protohelpers.ErrIntOverflow - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Size |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 10: + case 11: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field CreatedBy", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field StringTable", wireType) } var stringLen uint64 for shift := uint(0); ; shift += 7 { @@ -1069,7 +1078,7 @@ func (m *BlockMeta) UnmarshalVT(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.CreatedBy = string(dAtA[iNdEx:postIndex]) + m.StringTable = append(m.StringTable, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex default: iNdEx = preIndex @@ -1123,10 +1132,10 @@ func (m *Dataset) UnmarshalVT(dAtA []byte) error { } switch fieldNum { case 1: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field TenantId", wireType) + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Tenant", wireType) } - var stringLen uint64 + m.Tenant = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -1136,29 +1145,16 @@ func (m *Dataset) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + m.Tenant |= int32(b&0x7F) << shift if b < 0x80 { break } } - intStringLen := int(stringLen) - if intStringLen < 0 { - return protohelpers.ErrInvalidLength - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return protohelpers.ErrInvalidLength - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.TenantId = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex case 2: - if wireType != 2 { + if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType) } - var stringLen uint64 + m.Name = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return protohelpers.ErrIntOverflow @@ -1168,24 +1164,11 @@ func (m *Dataset) UnmarshalVT(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + m.Name |= int32(b&0x7F) << shift if b < 0x80 { break } } - intStringLen := int(stringLen) - if intStringLen < 0 { - return protohelpers.ErrInvalidLength - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return protohelpers.ErrInvalidLength - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Name = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex case 3: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field MinTime", wireType) @@ -1320,79 +1303,157 @@ func (m *Dataset) UnmarshalVT(dAtA []byte) error { } } case 7: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field ProfileTypes", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return protohelpers.ErrIntOverflow + if wireType == 0 { + var v int32 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int32(b&0x7F) << shift + if b < 0x80 { + break + } } - if iNdEx >= l { + m.ProfileTypes = append(m.ProfileTypes, v) + } else if wireType == 2 { + var packedLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + packedLen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if packedLen < 0 { + return protohelpers.ErrInvalidLength + } + postIndex := iNdEx + packedLen + if postIndex < 0 { + return protohelpers.ErrInvalidLength + } + if postIndex > l { return io.ErrUnexpectedEOF } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= uint64(b&0x7F) << shift - if b < 0x80 { - break + var elementCount int + var count int + for _, integer := range dAtA[iNdEx:postIndex] { + if integer < 128 { + count++ + } } + elementCount = count + if elementCount != 0 && len(m.ProfileTypes) == 0 { + m.ProfileTypes = make([]int32, 0, elementCount) + } + for iNdEx < postIndex { + var v int32 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int32(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.ProfileTypes = append(m.ProfileTypes, v) + } + } else { + return fmt.Errorf("proto: wrong wireType = %d for field ProfileTypes", wireType) } - intStringLen := int(stringLen) - if intStringLen < 0 { - return protohelpers.ErrInvalidLength - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return protohelpers.ErrInvalidLength - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.ProfileTypes = append(m.ProfileTypes, string(dAtA[iNdEx:postIndex])) - iNdEx = postIndex case 8: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Labels", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return protohelpers.ErrIntOverflow + if wireType == 0 { + var v int32 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int32(b&0x7F) << shift + if b < 0x80 { + break + } } - if iNdEx >= l { + m.Labels = append(m.Labels, v) + } else if wireType == 2 { + var packedLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + packedLen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if packedLen < 0 { + return protohelpers.ErrInvalidLength + } + postIndex := iNdEx + packedLen + if postIndex < 0 { + return protohelpers.ErrInvalidLength + } + if postIndex > l { return io.ErrUnexpectedEOF } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break + var elementCount int + var count int + for _, integer := range dAtA[iNdEx:postIndex] { + if integer < 128 { + count++ + } } - } - if msglen < 0 { - return protohelpers.ErrInvalidLength - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return protohelpers.ErrInvalidLength - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Labels = append(m.Labels, &v1.Labels{}) - if unmarshal, ok := interface{}(m.Labels[len(m.Labels)-1]).(interface { - UnmarshalVT([]byte) error - }); ok { - if err := unmarshal.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil { - return err + elementCount = count + if elementCount != 0 && len(m.Labels) == 0 { + m.Labels = make([]int32, 0, elementCount) } - } else { - if err := proto.Unmarshal(dAtA[iNdEx:postIndex], m.Labels[len(m.Labels)-1]); err != nil { - return err + for iNdEx < postIndex { + var v int32 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int32(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.Labels = append(m.Labels, v) } + } else { + return fmt.Errorf("proto: wrong wireType = %d for field Labels", wireType) } - iNdEx = postIndex default: iNdEx = preIndex skippy, err := protohelpers.Skip(dAtA[iNdEx:]) diff --git a/api/metastore/v1/types.proto b/api/metastore/v1/types.proto index 8ce57d1b4e..18ac943dba 100644 --- a/api/metastore/v1/types.proto +++ b/api/metastore/v1/types.proto @@ -2,8 +2,6 @@ syntax = "proto3"; package metastore.v1; -import "types/v1/types.proto"; - message BlockList { string tenant = 1; uint32 shard = 2; @@ -11,22 +9,28 @@ message BlockList { } message BlockMeta { - uint64 format_version = 1; + uint32 format_version = 1; + // Block ID is a unique identifier for the block. + // This is the only field that is not included into + // the string table. string id = 2; - int64 min_time = 3; - int64 max_time = 4; - uint32 shard = 5; - uint32 compaction_level = 6; // Optional. Empty if compaction level is 0. - string tenant_id = 7; - repeated Dataset datasets = 8; + int32 tenant = 3; + uint32 shard = 4; + uint32 compaction_level = 5; + int64 min_time = 6; + int64 max_time = 7; + int32 created_by = 8; uint64 size = 9; - string created_by = 10; + repeated Dataset datasets = 10; + // String table contains strings of the block. + // By convention, the first string is always an empty string. + repeated string string_table = 11; } message Dataset { - string tenant_id = 1; - string name = 2; + int32 tenant = 1; + int32 name = 2; int64 min_time = 3; int64 max_time = 4; @@ -44,6 +48,7 @@ message Dataset { // TODO: Delete. Use labels instead. // Profile types present in the tenant service data. - repeated string profile_types = 7; - repeated types.v1.Labels labels = 8; + repeated int32 profile_types = 7; + // Length prefixed label key-value pairs. + repeated int32 labels = 8; } diff --git a/api/openapiv2/gen/phlare.swagger.json b/api/openapiv2/gen/phlare.swagger.json index 80d52beb6d..dc2d8ae1dd 100644 --- a/api/openapiv2/gen/phlare.swagger.json +++ b/api/openapiv2/gen/phlare.swagger.json @@ -602,19 +602,17 @@ "type": "object", "properties": { "formatVersion": { - "type": "string", - "format": "uint64" + "type": "integer", + "format": "int64" }, "id": { - "type": "string" - }, - "minTime": { "type": "string", - "format": "int64" + "description": "Block ID is a unique identifier for the block.\nThis is the only field that is not included into\nthe string table." }, - "maxTime": { - "type": "string", - "format": "int64" + "tenant": { + "type": "integer", + "format": "int32", + "description": "Optional. Empty if compaction level is 0." }, "shard": { "type": "integer", @@ -624,9 +622,21 @@ "type": "integer", "format": "int64" }, - "tenantId": { + "minTime": { "type": "string", - "description": "Optional. Empty if compaction level is 0." + "format": "int64" + }, + "maxTime": { + "type": "string", + "format": "int64" + }, + "createdBy": { + "type": "integer", + "format": "int32" + }, + "size": { + "type": "string", + "format": "uint64" }, "datasets": { "type": "array", @@ -635,12 +645,12 @@ "$ref": "#/definitions/v1Dataset" } }, - "size": { - "type": "string", - "format": "uint64" - }, - "createdBy": { - "type": "string" + "stringTable": { + "type": "array", + "items": { + "type": "string" + }, + "description": "String table contains strings of the block.\nBy convention, the first string is always an empty string." } } }, @@ -824,11 +834,13 @@ "v1Dataset": { "type": "object", "properties": { - "tenantId": { - "type": "string" + "tenant": { + "type": "integer", + "format": "int32" }, "name": { - "type": "string" + "type": "integer", + "format": "int32" }, "minTime": { "type": "string", @@ -854,16 +866,18 @@ "profileTypes": { "type": "array", "items": { - "type": "string" + "type": "integer", + "format": "int32" }, "description": "TODO: Delete. Use labels instead.\nProfile types present in the tenant service data." }, "labels": { "type": "array", "items": { - "type": "object", - "$ref": "#/definitions/v1Labels" - } + "type": "integer", + "format": "int32" + }, + "description": "Length prefixed label key-value pairs." } } }, diff --git a/go.mod b/go.mod index af914f1d1f..1b2cb5920a 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/uber/jaeger-client-go v2.30.0+incompatible github.com/valyala/bytebufferpool v1.0.0 github.com/xlab/treeprint v1.2.0 - go.etcd.io/bbolt v1.3.10 + go.etcd.io/bbolt v1.3.11 go.opentelemetry.io/otel v1.30.0 go.opentelemetry.io/proto/otlp v1.1.0 go.uber.org/atomic v1.11.0 diff --git a/go.sum b/go.sum index e1723b5845..f5630aea47 100644 --- a/go.sum +++ b/go.sum @@ -760,8 +760,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= -go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= +go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= +go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg= diff --git a/pkg/experiment/query_backend/block/compaction.go b/pkg/experiment/block/compaction.go similarity index 86% rename from pkg/experiment/query_backend/block/compaction.go rename to pkg/experiment/block/compaction.go index 7289826b11..a7b535f06e 100644 --- a/pkg/experiment/query_backend/block/compaction.go +++ b/pkg/experiment/block/compaction.go @@ -2,7 +2,6 @@ package block import ( "context" - "crypto/rand" "fmt" "os" "path/filepath" @@ -12,7 +11,6 @@ import ( "sync" "github.com/grafana/dskit/multierror" - "github.com/oklog/ulid" "github.com/parquet-go/parquet-go" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/storage" @@ -115,18 +113,17 @@ func PlanCompaction(objects Objects) ([]*CompactionPlan, error) { } level++ - // Assuming that the first block in the job is the oldest one. - timestamp := ulid.MustParse(r.meta.Id).Time() + g := NewULIDGenerator(objects) m := make(map[string]*CompactionPlan) for _, obj := range objects { for _, s := range obj.meta.Datasets { - tm, ok := m[s.TenantId] + tm, ok := m[obj.meta.StringTable[s.Tenant]] if !ok { - tm = newBlockCompaction(timestamp, s.TenantId, r.meta.Shard, level) - m[s.TenantId] = tm + tm = newBlockCompaction(g.ULID().String(), obj.meta.StringTable[s.Tenant], r.meta.Shard, level) + m[obj.meta.StringTable[s.Tenant]] = tm } - sm := tm.addDataset(s) // Bind objects to datasets. + sm := tm.addDataset(obj.meta, s) sm.append(NewDataset(s, obj)) } } @@ -135,44 +132,49 @@ func PlanCompaction(objects Objects) ([]*CompactionPlan, error) { for _, tm := range m { ordered = append(ordered, tm) slices.SortFunc(tm.datasets, func(a, b *datasetCompaction) int { - return strings.Compare(a.meta.Name, b.meta.Name) + return strings.Compare(a.name, b.name) }) } slices.SortFunc(ordered, func(a, b *CompactionPlan) int { - return strings.Compare(a.tenantID, b.tenantID) + return strings.Compare(a.tenant, b.tenant) }) return ordered, nil } type CompactionPlan struct { - tenantID string - datasetMap map[string]*datasetCompaction + tenant string + path string + datasetMap map[int32]*datasetCompaction datasets []*datasetCompaction meta *metastorev1.BlockMeta + strings *MetadataStrings } -func newBlockCompaction(unixMilli uint64, tenantID string, shard uint32, compactionLevel uint32) *CompactionPlan { - return &CompactionPlan{ - tenantID: tenantID, - datasetMap: make(map[string]*datasetCompaction), - meta: &metastorev1.BlockMeta{ - FormatVersion: 1, - // TODO(kolesnikovae): Make it deterministic? - Id: ulid.MustNew(unixMilli, rand.Reader).String(), - TenantId: tenantID, - Shard: shard, - CompactionLevel: compactionLevel, - Datasets: nil, - MinTime: 0, - MaxTime: 0, - Size: 0, - }, - } +func newBlockCompaction( + id string, + tenant string, + shard uint32, + compactionLevel uint32, +) *CompactionPlan { + p := &CompactionPlan{ + tenant: tenant, + datasetMap: make(map[int32]*datasetCompaction), + strings: NewMetadataStringTable(), + } + p.path = BuildObjectPath(tenant, shard, compactionLevel, id) + p.meta = &metastorev1.BlockMeta{ + FormatVersion: 1, + Id: id, + Tenant: p.strings.Put(tenant), + Shard: shard, + CompactionLevel: compactionLevel, + } + return p } func (b *CompactionPlan) Compact(ctx context.Context, dst objstore.Bucket, tmpdir string) (m *metastorev1.BlockMeta, err error) { - w := NewBlockWriter(dst, ObjectPath(b.meta), tmpdir) + w := NewBlockWriter(dst, b.path, tmpdir) defer func() { err = multierror.New(err, w.Close()).Err() }() @@ -187,14 +189,17 @@ func (b *CompactionPlan) Compact(ctx context.Context, dst objstore.Bucket, tmpdi return nil, fmt.Errorf("flushing block writer: %w", err) } b.meta.Size = w.Offset() + b.meta.StringTable = b.strings.Strings return b.meta, nil } -func (b *CompactionPlan) addDataset(s *metastorev1.Dataset) *datasetCompaction { - sm, ok := b.datasetMap[s.Name] +func (b *CompactionPlan) addDataset(md *metastorev1.BlockMeta, s *metastorev1.Dataset) *datasetCompaction { + name := b.strings.Put(md.StringTable[s.Name]) + tenant := b.strings.Put(md.StringTable[s.Tenant]) + sm, ok := b.datasetMap[name] if !ok { - sm = newDatasetCompaction(s.TenantId, s.Name) - b.datasetMap[s.Name] = sm + sm = b.newDatasetCompaction(tenant, name) + b.datasetMap[name] = sm b.datasets = append(b.datasets, sm) } if b.meta.MinTime == 0 || s.MinTime < b.meta.MinTime { @@ -207,8 +212,12 @@ func (b *CompactionPlan) addDataset(s *metastorev1.Dataset) *datasetCompaction { } type datasetCompaction struct { + // Dataset name. + name string + parent *CompactionPlan + meta *metastorev1.Dataset - ptypes map[string]struct{} + ptypes map[int32]struct{} path string // Set at open. datasets []*Dataset @@ -224,12 +233,14 @@ type datasetCompaction struct { flushOnce sync.Once } -func newDatasetCompaction(tenantID, name string) *datasetCompaction { +func (b *CompactionPlan) newDatasetCompaction(tenant, name int32) *datasetCompaction { return &datasetCompaction{ - ptypes: make(map[string]struct{}, 10), + name: b.strings.Strings[name], + parent: b, + ptypes: make(map[int32]struct{}, 10), meta: &metastorev1.Dataset{ - TenantId: tenantID, - Name: name, + Tenant: tenant, + Name: name, // Updated at append. MinTime: 0, MaxTime: 0, @@ -250,7 +261,8 @@ func (m *datasetCompaction) append(s *Dataset) { m.meta.MaxTime = s.meta.MaxTime } for _, pt := range s.meta.ProfileTypes { - m.ptypes[pt] = struct{}{} + ptn := m.parent.strings.Put(s.obj.meta.StringTable[pt]) + m.ptypes[ptn] = struct{}{} } } @@ -388,11 +400,10 @@ func (m *datasetCompaction) writeTo(w *Writer) (err error) { return err } m.meta.Size = w.Offset() - off - m.meta.ProfileTypes = make([]string, 0, len(m.ptypes)) + m.meta.ProfileTypes = make([]int32, 0, len(m.ptypes)) for pt := range m.ptypes { m.meta.ProfileTypes = append(m.meta.ProfileTypes, pt) } - sort.Strings(m.meta.ProfileTypes) return nil } diff --git a/pkg/experiment/query_backend/block/compaction_test.go b/pkg/experiment/block/compaction_test.go similarity index 76% rename from pkg/experiment/query_backend/block/compaction_test.go rename to pkg/experiment/block/compaction_test.go index e2c9ac8c32..0176077e40 100644 --- a/pkg/experiment/query_backend/block/compaction_test.go +++ b/pkg/experiment/block/compaction_test.go @@ -2,10 +2,12 @@ package block import ( "context" + "encoding/json" "os" "path/filepath" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/encoding/protojson" @@ -33,6 +35,9 @@ func Test_CompactBlocks(t *testing.T) { ) require.NoError(t, err) - require.Len(t, compactedBlocks, 1) - // TODO: Assertions. + compactedJson, err := json.MarshalIndent(compactedBlocks, "", " ") + require.NoError(t, err) + expectedJson, err := os.ReadFile("testdata/compacted.golden") + require.NoError(t, err) + assert.Equal(t, string(expectedJson), string(compactedJson)) } diff --git a/pkg/experiment/query_backend/block/constants.go b/pkg/experiment/block/constants.go similarity index 92% rename from pkg/experiment/query_backend/block/constants.go rename to pkg/experiment/block/constants.go index 92b390ce37..23c622b0e7 100644 --- a/pkg/experiment/query_backend/block/constants.go +++ b/pkg/experiment/block/constants.go @@ -5,12 +5,14 @@ import ( ) const ( - DirPathSegment = "segments/" - DirPathBlock = "blocks/" + DirNameSegment = "segments" + DirNameBlock = "blocks" + DirNameDLQ = "dlq" DirNameAnonTenant = tenant.DefaultTenantID FileNameProfilesParquet = "profiles.parquet" FileNameDataObject = "block.bin" + FileNameMetadataObject = "meta.pb" ) const ( diff --git a/pkg/experiment/query_backend/block/dataset.go b/pkg/experiment/block/dataset.go similarity index 100% rename from pkg/experiment/query_backend/block/dataset.go rename to pkg/experiment/block/dataset.go diff --git a/pkg/experiment/block/metadata.go b/pkg/experiment/block/metadata.go new file mode 100644 index 0000000000..9693172604 --- /dev/null +++ b/pkg/experiment/block/metadata.go @@ -0,0 +1,161 @@ +package block + +import ( + "io" + "math/rand" + "sync" + "time" + + "github.com/cespare/xxhash/v2" + "github.com/oklog/ulid" + + metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/iter" +) + +func Tenant(md *metastorev1.BlockMeta) string { + if md.Tenant <= 0 || int(md.Tenant) >= len(md.StringTable) { + return "" + } + return md.StringTable[md.Tenant] +} + +func Timestamp(md *metastorev1.BlockMeta) time.Time { + return time.UnixMilli(int64(ulid.MustParse(md.Id).Time())) +} + +func SanitizeMetadata(md *metastorev1.BlockMeta) error { + // TODO(kolesnikovae): Implement. + _, err := ulid.Parse(md.Id) + return err +} + +var stringTablePool = sync.Pool{ + New: func() any { return NewMetadataStringTable() }, +} + +type MetadataStrings struct { + Dict map[string]int32 + Strings []string +} + +func NewMetadataStringTable() *MetadataStrings { + var empty string + return &MetadataStrings{ + Dict: map[string]int32{empty: 0}, + Strings: []string{empty}, + } +} + +func (t *MetadataStrings) IsEmpty() bool { + if len(t.Strings) == 0 { + return true + } + return len(t.Strings) == 1 && t.Strings[0] == "" +} + +func (t *MetadataStrings) Reset() { + clear(t.Dict) + t.Dict[""] = 0 + t.Strings[0] = "" + t.Strings = t.Strings[:1] +} + +func (t *MetadataStrings) Put(s string) int32 { + if i, ok := t.Dict[s]; ok { + return i + } + i := int32(len(t.Strings)) + t.Strings = append(t.Strings, s) + t.Dict[s] = i + return i +} + +func (t *MetadataStrings) Lookup(i int32) string { + if i < 0 || int(i) >= len(t.Strings) { + return "" + } + return t.Strings[i] +} + +// Import strings from the metadata entry and update the references. +func (t *MetadataStrings) Import(src *metastorev1.BlockMeta) { + if len(src.StringTable) < 2 { + return + } + // TODO: Pool? + lut := make([]int32, len(src.StringTable)) + for i, s := range src.StringTable { + x := t.Put(s) + lut[i] = x + } + src.Tenant = lut[src.Tenant] + src.CreatedBy = lut[src.CreatedBy] + for _, ds := range src.Datasets { + ds.Tenant = lut[ds.Tenant] + ds.Name = lut[ds.Name] + for i, p := range ds.ProfileTypes { + ds.ProfileTypes[i] = lut[p] + } + } +} + +func (t *MetadataStrings) Export(dst *metastorev1.BlockMeta) { + n := stringTablePool.Get().(*MetadataStrings) + defer stringTablePool.Put(n) + dst.Tenant = n.Put(t.Lookup(dst.Tenant)) + dst.CreatedBy = n.Put(t.Lookup(dst.CreatedBy)) + for _, ds := range dst.Datasets { + ds.Tenant = n.Put(t.Lookup(ds.Tenant)) + ds.Name = n.Put(t.Lookup(ds.Name)) + for i := range ds.ProfileTypes { + ds.ProfileTypes[i] = n.Put(t.Lookup(ds.ProfileTypes[i])) + } + } + dst.StringTable = make([]string, len(n.Strings)) + copy(dst.StringTable, n.Strings) + n.Reset() +} + +func (t *MetadataStrings) Load(x iter.Iterator[string]) error { + for x.Next() { + t.Put(x.At()) + } + return x.Err() +} + +// ULIDGenerator generates deterministic ULIDs for blocks in an +// idempotent way: for the same set of objects, the generator +// will always produce the same set of ULIDs. +// +// We require block identifiers to be deterministic to ensure +// deduplication of the blocks. +type ULIDGenerator struct { + timestamp uint64 // Unix millis. + entropy io.Reader +} + +func NewULIDGenerator(objects Objects) *ULIDGenerator { + if len(objects) == 0 { + return &ULIDGenerator{ + timestamp: uint64(time.Now().UnixMilli()), + } + } + buf := make([]byte, 0, 1<<10) + for _, obj := range objects { + buf = append(buf, obj.meta.Id...) + } + seed := xxhash.Sum64(buf) + // Reference block. + // We're using its timestamp in all the generated ULIDs. + // Assuming that the first object is the oldest one. + r := objects[0] + return &ULIDGenerator{ + timestamp: ulid.MustParse(r.meta.Id).Time(), + entropy: rand.New(rand.NewSource(int64(seed))), + } +} + +func (g *ULIDGenerator) ULID() ulid.ULID { + return ulid.MustNew(g.timestamp, g.entropy) +} diff --git a/pkg/experiment/block/metadata_test.go b/pkg/experiment/block/metadata_test.go new file mode 100644 index 0000000000..0575cd80a2 --- /dev/null +++ b/pkg/experiment/block/metadata_test.go @@ -0,0 +1,114 @@ +package block + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" +) + +func TestMetadataStrings_Import(t *testing.T) { + md1 := &metastorev1.BlockMeta{ + Id: "block_id", + Tenant: 0, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}}, + {Tenant: 7, Name: 8, ProfileTypes: []int32{5, 6, 9}}, + }, + StringTable: []string{ + "", "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-b", "dataset-b", "4", + }, + } + + table := NewMetadataStringTable() + md1c := md1.CloneVT() + table.Import(md1c) + assert.Equal(t, md1, md1c) + assert.Equal(t, table.Strings, md1.StringTable) + + // Exactly the same metadata. + md2 := md1.CloneVT() + table.Import(md2) + assert.Len(t, md2.StringTable, 10) + assert.Len(t, table.Strings, 10) + assert.Equal(t, table.Strings, md2.StringTable) + + md3 := &metastorev1.BlockMeta{ + Id: "block_id_3", + Tenant: 0, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}}, + {Tenant: 7, Name: 8, ProfileTypes: []int32{4, 9}}, + }, + StringTable: []string{ + "", "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-c", "dataset-c", "5", + }, + } + + table.Import(md3) + expected := &metastorev1.BlockMeta{ + Id: "block_id_3", + Tenant: 0, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}}, + {Tenant: 10, Name: 11, ProfileTypes: []int32{4, 12}}, + }, + StringTable: []string{ + "", "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-c", "dataset-c", "5", + }, + } + + assert.Equal(t, expected, md3) + assert.Len(t, table.Strings, 13) +} + +func TestMetadataStrings_Export(t *testing.T) { + table := NewMetadataStringTable() + for _, s := range []string{ + "", "x1", "x2", "x3", "x4", "x5", + "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-b", "dataset-b", "4", + } { + table.Put(s) + } + + md := &metastorev1.BlockMeta{ + Id: "1", + Tenant: 0, + CreatedBy: 6, + Datasets: []*metastorev1.Dataset{ + {Tenant: 7, Name: 8, ProfileTypes: []int32{9, 10, 11}}, + {Tenant: 12, Name: 13, ProfileTypes: []int32{10, 11, 14}}, + }, + } + + table.Export(md) + + expected := &metastorev1.BlockMeta{ + Id: "1", + Tenant: 0, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}}, + {Tenant: 7, Name: 8, ProfileTypes: []int32{5, 6, 9}}, + }, + StringTable: []string{ + "", "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-b", "dataset-b", "4", + }, + } + + assert.Equal(t, expected, md) +} diff --git a/pkg/experiment/query_backend/block/object.go b/pkg/experiment/block/object.go similarity index 88% rename from pkg/experiment/query_backend/block/object.go rename to pkg/experiment/block/object.go index 95f2e0a81e..b900f667b3 100644 --- a/pkg/experiment/query_backend/block/object.go +++ b/pkg/experiment/block/object.go @@ -95,11 +95,11 @@ func WithObjectDownload(dir string) ObjectOption { } } -func NewObject(storage objstore.Bucket, meta *metastorev1.BlockMeta, opts ...ObjectOption) *Object { +func NewObject(storage objstore.Bucket, md *metastorev1.BlockMeta, opts ...ObjectOption) *Object { o := &Object{ storage: storage, - meta: meta, - path: ObjectPath(meta), + meta: md, + path: ObjectPath(md), memSize: defaultObjectSizeLoadInMemory, } for _, opt := range opts { @@ -109,18 +109,19 @@ func NewObject(storage objstore.Bucket, meta *metastorev1.BlockMeta, opts ...Obj } func ObjectPath(md *metastorev1.BlockMeta) string { - return BuildObjectPath(md.TenantId, md.Shard, md.CompactionLevel, md.Id) + return BuildObjectPath(Tenant(md), md.Shard, md.CompactionLevel, md.Id) } func BuildObjectPath(tenant string, shard uint32, level uint32, block string) string { - topLevel := DirPathBlock + topLevel := DirNameBlock tenantDirName := tenant if level == 0 { - topLevel = DirPathSegment + topLevel = DirNameSegment tenantDirName = DirNameAnonTenant } var b strings.Builder b.WriteString(topLevel) + b.WriteByte('/') b.WriteString(strconv.Itoa(int(shard))) b.WriteByte('/') b.WriteString(tenantDirName) @@ -131,6 +132,24 @@ func BuildObjectPath(tenant string, shard uint32, level uint32, block string) st return b.String() } +func MetadataDLQObjectPath(md *metastorev1.BlockMeta) string { + var b strings.Builder + tenantDirName := DirNameAnonTenant + if md.CompactionLevel > 0 { + tenantDirName = Tenant(md) + } + b.WriteString(DirNameDLQ) + b.WriteByte('/') + b.WriteString(strconv.Itoa(int(md.Shard))) + b.WriteByte('/') + b.WriteString(tenantDirName) + b.WriteByte('/') + b.WriteString(md.Id) + b.WriteByte('/') + b.WriteString(FileNameMetadataObject) + return b.String() +} + // Open opens the object, loading the data into memory if it's small enough. // // Open may be called multiple times concurrently, but the diff --git a/pkg/experiment/query_backend/block/section_profiles.go b/pkg/experiment/block/section_profiles.go similarity index 100% rename from pkg/experiment/query_backend/block/section_profiles.go rename to pkg/experiment/block/section_profiles.go diff --git a/pkg/experiment/query_backend/block/section_symbols.go b/pkg/experiment/block/section_symbols.go similarity index 100% rename from pkg/experiment/query_backend/block/section_symbols.go rename to pkg/experiment/block/section_symbols.go diff --git a/pkg/experiment/query_backend/block/section_tsdb.go b/pkg/experiment/block/section_tsdb.go similarity index 100% rename from pkg/experiment/query_backend/block/section_tsdb.go rename to pkg/experiment/block/section_tsdb.go diff --git a/pkg/experiment/query_backend/block/testdata/.gitignore b/pkg/experiment/block/testdata/.gitignore similarity index 100% rename from pkg/experiment/query_backend/block/testdata/.gitignore rename to pkg/experiment/block/testdata/.gitignore diff --git a/pkg/experiment/query_backend/block/testdata/block-metas.json b/pkg/experiment/block/testdata/block-metas.json similarity index 73% rename from pkg/experiment/query_backend/block/testdata/block-metas.json rename to pkg/experiment/block/testdata/block-metas.json index 9762eaf057..086191bd50 100644 --- a/pkg/experiment/query_backend/block/testdata/block-metas.json +++ b/pkg/experiment/block/testdata/block-metas.json @@ -7,8 +7,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/query-frontend", + "tenant": 1, + "name": 2, "minTime": "1721060035611", "maxTime": "1721060035611", "tableOfContents": [ @@ -18,6 +18,11 @@ ], "size": "19471" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/query-frontend" ] }, { @@ -27,8 +32,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 2, "minTime": "1721060023235", "maxTime": "1721060023235", "tableOfContents": [ @@ -38,6 +43,11 @@ ], "size": "22242" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/ingester" ] }, { @@ -47,8 +57,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/alloy", + "tenant": 1, + "name": 2, "minTime": "1721060010831", "maxTime": "1721060010831", "tableOfContents": [ @@ -58,6 +68,11 @@ ], "size": "17664" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/alloy" ] }, { @@ -67,8 +82,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/query-frontend", + "tenant": 1, + "name": 2, "minTime": "1721060025159", "maxTime": "1721060025159", "tableOfContents": [ @@ -78,6 +93,11 @@ ], "size": "21765" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/query-frontend" ] }, { @@ -87,8 +107,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 2, "minTime": "1721060026913", "maxTime": "1721060026913", "tableOfContents": [ @@ -98,6 +118,11 @@ ], "size": "28169" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/ingester" ] }, { @@ -107,8 +132,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/query-frontend", + "tenant": 1, + "name": 2, "minTime": "1721060013534", "maxTime": "1721060013534", "tableOfContents": [ @@ -118,6 +143,11 @@ ], "size": "15785" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/query-frontend" ] }, { @@ -127,8 +157,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 2, "minTime": "1721060015603", "maxTime": "1721060015603", "tableOfContents": [ @@ -138,6 +168,11 @@ ], "size": "27431" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/ingester" ] }, { @@ -147,8 +182,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 2, "minTime": "1721060031203", "maxTime": "1721060031203", "tableOfContents": [ @@ -158,6 +193,11 @@ ], "size": "36655" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/ingester" ] }, { @@ -167,8 +207,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 2, "minTime": "1721060032190", "maxTime": "1721060032190", "tableOfContents": [ @@ -178,6 +218,11 @@ ], "size": "24273" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/ingester" ] }, { @@ -187,8 +232,8 @@ "shard": 1, "datasets": [ { - "tenantId": "anonymous", - "name": "pyroscope-test/alloy", + "tenant": 1, + "name": 2, "minTime": "1721060033248", "maxTime": "1721060033248", "tableOfContents": [ @@ -199,8 +244,8 @@ "size": "27397" }, { - "tenantId": "anonymous", - "name": "pyroscope-test/ingester", + "tenant": 1, + "name": 3, "minTime": "1721060033680", "maxTime": "1721060033802", "tableOfContents": [ @@ -210,6 +255,12 @@ ], "size": "50561" } + ], + "stringTable": [ + "", + "anonymous", + "pyroscope-test/alloy", + "pyroscope-test/ingester" ] } ] diff --git a/pkg/experiment/block/testdata/compacted.golden b/pkg/experiment/block/testdata/compacted.golden new file mode 100644 index 0000000000..aa4e180c4e --- /dev/null +++ b/pkg/experiment/block/testdata/compacted.golden @@ -0,0 +1,57 @@ +[ + { + "format_version": 1, + "id": "01J2VJR31P221PP7MVJSX7E468", + "tenant": 1, + "shard": 1, + "compaction_level": 1, + "min_time": 1721060010831, + "max_time": 1721060035611, + "size": 150042, + "datasets": [ + { + "tenant": 1, + "name": 4, + "min_time": 1721060010831, + "max_time": 1721060033248, + "table_of_contents": [ + 0, + 4689, + 8118 + ], + "size": 35057 + }, + { + "tenant": 1, + "name": 3, + "min_time": 1721060015603, + "max_time": 1721060033802, + "table_of_contents": [ + 35057, + 42688, + 49619 + ], + "size": 76886 + }, + { + "tenant": 1, + "name": 2, + "min_time": 1721060013534, + "max_time": 1721060035611, + "table_of_contents": [ + 111943, + 117040, + 121502 + ], + "size": 38099 + } + ], + "string_table": [ + "", + "anonymous", + "pyroscope-test/query-frontend", + "pyroscope-test/ingester", + "pyroscope-test/alloy" + ] + } +] \ No newline at end of file diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQPYDC160REPAD2VN88XN/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQPYDC160REPAD2VN88XN/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQPYDC160REPAD2VN88XN/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQPYDC160REPAD2VN88XN/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQRGBK8YFWVV8K1MPRRWM/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQRGBK8YFWVV8K1MPRRWM/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQRGBK8YFWVV8K1MPRRWM/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQRGBK8YFWVV8K1MPRRWM/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQRTMSCY4VDYBP5N4N5JK/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQRTMSCY4VDYBP5N4N5JK/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQRTMSCY4VDYBP5N4N5JK/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQRTMSCY4VDYBP5N4N5JK/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQTJ3PGF7KB39ARR1BX3Y/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQTJ3PGF7KB39ARR1BX3Y/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQTJ3PGF7KB39ARR1BX3Y/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQTJ3PGF7KB39ARR1BX3Y/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQV544TF571FDSK2H692P/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQV544TF571FDSK2H692P/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQV544TF571FDSK2H692P/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQV544TF571FDSK2H692P/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQX8DYHSEBK7BAQSCJBMG/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQX8DYHSEBK7BAQSCJBMG/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQX8DYHSEBK7BAQSCJBMG/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQX8DYHSEBK7BAQSCJBMG/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQYQVZTPZMMJKE7F2XC47/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQYQVZTPZMMJKE7F2XC47/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQYQVZTPZMMJKE7F2XC47/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQYQVZTPZMMJKE7F2XC47/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQZPARDJQ779S1JMV0XQA/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQZPARDJQ779S1JMV0XQA/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJQZPARDJQ779S1JMV0XQA/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJQZPARDJQ779S1JMV0XQA/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJR0R3NQS23SDADNA6XHCM/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJR0R3NQS23SDADNA6XHCM/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJR0R3NQS23SDADNA6XHCM/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJR0R3NQS23SDADNA6XHCM/block.bin diff --git a/pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJR31PT3X4NDJC4Q2BHWQ1/block.bin b/pkg/experiment/block/testdata/segments/1/anonymous/01J2VJR31PT3X4NDJC4Q2BHWQ1/block.bin similarity index 100% rename from pkg/experiment/query_backend/block/testdata/segments/1/anonymous/01J2VJR31PT3X4NDJC4Q2BHWQ1/block.bin rename to pkg/experiment/block/testdata/segments/1/anonymous/01J2VJR31PT3X4NDJC4Q2BHWQ1/block.bin diff --git a/pkg/experiment/query_backend/block/writer.go b/pkg/experiment/block/writer.go similarity index 100% rename from pkg/experiment/query_backend/block/writer.go rename to pkg/experiment/block/writer.go diff --git a/pkg/experiment/compactor/compaction_worker.go b/pkg/experiment/compactor/compaction_worker.go index 2e85fcd80d..c03bd5bcb9 100644 --- a/pkg/experiment/compactor/compaction_worker.go +++ b/pkg/experiment/compactor/compaction_worker.go @@ -16,7 +16,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/services" - "github.com/oklog/ulid" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -24,7 +23,7 @@ import ( "golang.org/x/sync/errgroup" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/objstore" "github.com/grafana/pyroscope/pkg/util" ) @@ -381,12 +380,12 @@ func (w *Worker) runCompaction(job *compactionJob) { level.Info(logger).Log( "msg", "new compacted block", "block_id", c.Id, - "block_tenant", c.TenantId, + "block_tenant", block.Tenant(c), "block_shard", c.Shard, - "block_size", c.Size, "block_compaction_level", c.CompactionLevel, "block_min_time", c.MinTime, "block_max_time", c.MinTime, + "block_size", c.Size, "datasets", len(c.Datasets), ) } @@ -401,7 +400,7 @@ func (w *Worker) runCompaction(job *compactionJob) { }, } - firstBlock := time.UnixMilli(int64(ulid.MustParse(job.blocks[0].Id).Time())) + firstBlock := block.Timestamp(job.blocks[0]) w.metrics.timeToCompaction.WithLabelValues(labels...).Observe(time.Since(firstBlock).Seconds()) case errors.Is(err, context.Canceled): diff --git a/pkg/experiment/distributor/placement/adaptive_placement/distribution_stats.go b/pkg/experiment/distributor/placement/adaptive_placement/distribution_stats.go index be41f536c6..89a9d26863 100644 --- a/pkg/experiment/distributor/placement/adaptive_placement/distribution_stats.go +++ b/pkg/experiment/distributor/placement/adaptive_placement/distribution_stats.go @@ -97,11 +97,8 @@ func (k counterKey) compare(x counterKey) int { if c := strings.Compare(k.dataset, x.dataset); c != 0 { return c } - if k.shard.id < x.shard.id { - return -1 - } - if k.shard.id > x.shard.id { - return 1 + if k.shard.id != x.shard.id { + return int(k.shard.id) - int(x.shard.id) } return strings.Compare(k.shard.owner, x.shard.owner) } diff --git a/pkg/experiment/ingester/segment.go b/pkg/experiment/ingester/segment.go index 92f4aa6784..414983c29e 100644 --- a/pkg/experiment/ingester/segment.go +++ b/pkg/experiment/ingester/segment.go @@ -21,8 +21,8 @@ import ( profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/experiment/ingester/memdb" - segmentstorage "github.com/grafana/pyroscope/pkg/experiment/ingester/storage" "github.com/grafana/pyroscope/pkg/model" pprofsplit "github.com/grafana/pyroscope/pkg/model/pprof_split" pprofmodel "github.com/grafana/pyroscope/pkg/pprof" @@ -238,17 +238,19 @@ func (s *segment) flush(ctx context.Context) (err error) { func (s *segment) flushBlock(heads []flushedServiceHead) ([]byte, *metastorev1.BlockMeta, error) { t1 := time.Now() hostname, _ := os.Hostname() + + stringTable := block.NewMetadataStringTable() meta := &metastorev1.BlockMeta{ FormatVersion: 1, Id: s.ulid.String(), - MinTime: 0, - MaxTime: 0, + Tenant: 0, Shard: uint32(s.shard), CompactionLevel: 0, - TenantId: "", - Datasets: make([]*metastorev1.Dataset, 0, len(heads)), + CreatedBy: stringTable.Put(hostname), + MinTime: 0, + MaxTime: 0, Size: 0, - CreatedBy: hostname, + Datasets: make([]*metastorev1.Dataset, 0, len(heads)), } blockFile := bytes.NewBuffer(nil) @@ -256,7 +258,7 @@ func (s *segment) flushBlock(heads []flushedServiceHead) ([]byte, *metastorev1.B w := withWriterOffset(blockFile) for i, e := range heads { - svc, err := concatSegmentHead(e, w) + svc, err := concatSegmentHead(e, w, stringTable) if err != nil { _ = level.Error(s.logger).Log("msg", "failed to concat segment head", "err", err) continue @@ -272,12 +274,13 @@ func (s *segment) flushBlock(heads []flushedServiceHead) ([]byte, *metastorev1.B meta.Datasets = append(meta.Datasets, svc) } + meta.StringTable = stringTable.Strings meta.Size = uint64(w.offset) s.debuginfo.flushBlockDuration = time.Since(t1) return blockFile.Bytes(), meta, nil } -func concatSegmentHead(e flushedServiceHead, w *writerOffset) (*metastorev1.Dataset, error) { +func concatSegmentHead(e flushedServiceHead, w *writerOffset, s *block.MetadataStrings) (*metastorev1.Dataset, error) { tenantServiceOffset := w.offset ptypes := e.head.Meta.ProfileTypeNames @@ -296,17 +299,21 @@ func concatSegmentHead(e flushedServiceHead, w *writerOffset) (*metastorev1.Data tenantServiceSize := w.offset - tenantServiceOffset svc := &metastorev1.Dataset{ - TenantId: e.key.tenant, - Name: e.key.service, - MinTime: e.head.Meta.MinTimeNanos / 1e6, - MaxTime: e.head.Meta.MaxTimeNanos / 1e6, - Size: uint64(tenantServiceSize), + Tenant: s.Put(e.key.tenant), + Name: s.Put(e.key.service), + MinTime: e.head.Meta.MinTimeNanos / 1e6, + MaxTime: e.head.Meta.MaxTimeNanos / 1e6, + Size: uint64(tenantServiceSize), // - 0: profiles.parquet // - 1: index.tsdb // - 2: symbols.symdb TableOfContents: offsets, - ProfileTypes: ptypes, + ProfileTypes: make([]int32, len(ptypes)), } + for i, p := range ptypes { + svc.ProfileTypes[i] = s.Put(p) + } + return svc, nil } @@ -512,7 +519,7 @@ func (sw *segmentsWriter) uploadBlock(ctx context.Context, blockData []byte, met }() sw.metrics.segmentBlockSizeBytes.WithLabelValues(s.sshard).Observe(float64(len(blockData))) - blockPath := segmentstorage.PathForSegment(meta) + blockPath := block.ObjectPath(meta) if err := sw.bucket.Upload(ctx, blockPath, bytes.NewReader(blockData)); err != nil { return err @@ -540,7 +547,7 @@ func (sw *segmentsWriter) storeMetaDLQ(ctx context.Context, meta *metastorev1.Bl sw.metrics.storeMetaDLQ.WithLabelValues(s.sshard, "err").Inc() return err } - fullPath := segmentstorage.PathForDLQ(meta) + fullPath := block.MetadataDLQObjectPath(meta) if err = sw.bucket.Upload(ctx, fullPath, bytes.NewReader(metaBlob)); err != nil { sw.metrics.storeMetaDLQ.WithLabelValues(s.sshard, "err").Inc() return fmt.Errorf("%w, %w", ErrMetastoreDLQFailed, err) diff --git a/pkg/experiment/ingester/segment_test.go b/pkg/experiment/ingester/segment_test.go index 06f2a053b3..5b5806fea7 100644 --- a/pkg/experiment/ingester/segment_test.go +++ b/pkg/experiment/ingester/segment_test.go @@ -10,24 +10,27 @@ import ( "slices" "strings" "sync" + "sync/atomic" "testing" "time" - "github.com/grafana/dskit/flagext" - - "github.com/grafana/pyroscope/pkg/experiment/metastore" - "github.com/grafana/pyroscope/pkg/test/mocks/mockdlq" - gprofile "github.com/google/pprof/profile" + "github.com/grafana/dskit/flagext" + model2 "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/util/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" ingesterv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1/ingesterv1connect" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/experiment/ingester/memdb" testutil2 "github.com/grafana/pyroscope/pkg/experiment/ingester/memdb/testutil" - segmentstorage "github.com/grafana/pyroscope/pkg/experiment/ingester/storage" + "github.com/grafana/pyroscope/pkg/experiment/metastore" "github.com/grafana/pyroscope/pkg/experiment/metastore/dlq" metastoretest "github.com/grafana/pyroscope/pkg/experiment/metastore/test" "github.com/grafana/pyroscope/pkg/model" @@ -37,16 +40,10 @@ import ( "github.com/grafana/pyroscope/pkg/phlaredb" testutil3 "github.com/grafana/pyroscope/pkg/phlaredb/block/testutil" pprofth "github.com/grafana/pyroscope/pkg/pprof/testhelper" + "github.com/grafana/pyroscope/pkg/test/mocks/mockdlq" "github.com/grafana/pyroscope/pkg/test/mocks/mockmetastorev1" "github.com/grafana/pyroscope/pkg/test/mocks/mockobjstore" "github.com/grafana/pyroscope/pkg/validation" - - model2 "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/util/testutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" ) func TestSegmentIngest(t *testing.T) { @@ -155,11 +152,11 @@ func TestBusyIngestLoop(t *testing.T) { readCtx, readCancel := context.WithCancel(context.Background()) metaChan := make(chan *metastorev1.BlockMeta) defer sw.Stop() - cnt := atomic.NewInt32(0) + var cnt atomic.Int32 sw.client.On("AddBlock", mock.Anything, mock.Anything, mock.Anything). Run(func(args mock.Arguments) { metaChan <- args.Get(1).(*metastorev1.AddBlockRequest).Block - if cnt.Inc() == 3 { + if cnt.Add(1) == 3 { writeCancel() } }).Return(new(metastorev1.AddBlockResponse), nil) @@ -234,10 +231,10 @@ func TestDLQFail(t *testing.T) { l := testutil.NewLogger(t) bucket := mockobjstore.NewMockBucket(t) bucket.On("Upload", mock.Anything, mock.MatchedBy(func(name string) bool { - return segmentstorage.IsSegmentPath(name) + return isSegmentPath(name) }), mock.Anything).Return(nil) bucket.On("Upload", mock.Anything, mock.MatchedBy(func(name string) bool { - return segmentstorage.IsDLQPath(name) + return isDLQPath(name) }), mock.Anything).Return(fmt.Errorf("mock upload DLQ error")) client := mockmetastorev1.NewMockIndexServiceClient(t) client.On("AddBlock", mock.Anything, mock.Anything, mock.Anything). @@ -501,19 +498,20 @@ func defaultTestConfig() Config { func (sw *sw) createBlocksFromMetas(blocks []*metastorev1.BlockMeta) tenantClients { dir := sw.t.TempDir() for _, meta := range blocks { - blobReader, err := sw.bucket.Get(context.Background(), segmentstorage.PathForSegment(meta)) + blobReader, err := sw.bucket.Get(context.Background(), block.ObjectPath(meta)) require.NoError(sw.t, err) blob, err := io.ReadAll(blobReader) require.NoError(sw.t, err) - for _, ts := range meta.Datasets { - profiles := blob[ts.TableOfContents[0]:ts.TableOfContents[1]] - tsdb := blob[ts.TableOfContents[1]:ts.TableOfContents[2]] - symbols := blob[ts.TableOfContents[2] : ts.TableOfContents[0]+ts.Size] + for _, ds := range meta.Datasets { + tenant := meta.StringTable[ds.Tenant] + profiles := blob[ds.TableOfContents[0]:ds.TableOfContents[1]] + tsdb := blob[ds.TableOfContents[1]:ds.TableOfContents[2]] + symbols := blob[ds.TableOfContents[2] : ds.TableOfContents[0]+ds.Size] testutil3.CreateBlockFromMemory(sw.t, - filepath.Join(dir, ts.TenantId), - model2.TimeFromUnixNano(ts.MinTime*1e6), //todo do not use 1e6, add comments to minTime clarifying the unit - model2.TimeFromUnixNano(ts.MaxTime*1e6), + filepath.Join(dir, tenant), + model2.TimeFromUnixNano(ds.MinTime*1e6), //todo do not use 1e6, add comments to minTime clarifying the unit + model2.TimeFromUnixNano(ds.MaxTime*1e6), profiles, tsdb, symbols, @@ -524,10 +522,10 @@ func (sw *sw) createBlocksFromMetas(blocks []*metastorev1.BlockMeta) tenantClien res := make(tenantClients) for _, meta := range blocks { for _, ds := range meta.Datasets { - tenant := ds.TenantId + tenant := meta.StringTable[ds.Tenant] if _, ok := res[tenant]; !ok { // todo consider not using BlockQuerier for tests - blockBucket, err := filesystem.NewBucket(filepath.Join(dir, ds.TenantId)) + blockBucket, err := filesystem.NewBucket(filepath.Join(dir, tenant)) blockQuerier := phlaredb.NewBlockQuerier(context.Background(), blockBucket) err = blockQuerier.Sync(context.Background()) @@ -655,9 +653,8 @@ func (sw *sw) getMetadataDLQ() []*metastorev1.BlockMeta { objects := sw.bucket.Objects() dlqFiles := []string{} for s := range objects { - if segmentstorage.IsDLQPath(s) { + if isDLQPath(s) { dlqFiles = append(dlqFiles, s) - } else { } } slices.Sort(dlqFiles) @@ -924,3 +921,19 @@ func (g *timestampGenerator) next() int { } } } + +func isDLQPath(p string) bool { + fs := strings.Split(p, "/") + return len(fs) == 5 && + fs[0] == block.DirNameDLQ && + fs[2] == block.DirNameAnonTenant && + fs[4] == block.FileNameMetadataObject +} + +func isSegmentPath(p string) bool { + fs := strings.Split(p, "/") + return len(fs) == 5 && + fs[0] == block.DirNameSegment && + fs[2] == block.DirNameAnonTenant && + fs[4] == block.FileNameDataObject +} diff --git a/pkg/experiment/ingester/storage/path.go b/pkg/experiment/ingester/storage/path.go deleted file mode 100644 index b2d87fdd2c..0000000000 --- a/pkg/experiment/ingester/storage/path.go +++ /dev/null @@ -1,34 +0,0 @@ -package storage - -import ( - "fmt" - metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - "github.com/grafana/pyroscope/pkg/tenant" - "path" - "strings" -) - -const PathDLQ = "dlq" - -const pathSegments = "segments" -const pathAnon = tenant.DefaultTenantID -const pathBlock = "block.bin" -const pathMetaPB = "meta.pb" - -func PathForDLQ(meta *metastorev1.BlockMeta) string { - return path.Join(PathDLQ, fmt.Sprintf("%d", meta.Shard), pathAnon, meta.Id, pathMetaPB) -} - -func PathForSegment(meta *metastorev1.BlockMeta) string { - return path.Join(pathSegments, fmt.Sprintf("%d", meta.Shard), pathAnon, meta.Id, pathBlock) -} - -func IsDLQPath(p string) bool { - fs := strings.Split(p, "/") - return len(fs) == 5 && fs[0] == PathDLQ && fs[2] == pathAnon && fs[4] == pathMetaPB -} - -func IsSegmentPath(p string) bool { - fs := strings.Split(p, "/") - return len(fs) == 5 && fs[0] == pathSegments && fs[2] == pathAnon && fs[4] == pathBlock -} diff --git a/pkg/experiment/metastore/compaction/compaction.go b/pkg/experiment/metastore/compaction/compaction.go index 626acc7792..71def608d9 100644 --- a/pkg/experiment/metastore/compaction/compaction.go +++ b/pkg/experiment/metastore/compaction/compaction.go @@ -6,11 +6,12 @@ import ( metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log" + "github.com/grafana/pyroscope/pkg/experiment/block" ) type Compactor interface { // Compact enqueues a new block for compaction - Compact(*bbolt.Tx, *raft.Log, *metastorev1.BlockMeta) error + Compact(*bbolt.Tx, BlockEntry) error } type Planner interface { @@ -18,10 +19,10 @@ type Planner interface { // submitted for Raft consensus, with the leader's jobs being accepted // as the final decision. // Implementation: Plan must not change the state of the Planner. - NewPlan(*bbolt.Tx, *raft.Log) Plan + NewPlan(*raft.Log) Plan // UpdatePlan communicates the status of the compaction job to the planner. // Implementation: This method must be idempotent. - UpdatePlan(*bbolt.Tx, *raft.Log, *raft_log.CompactionPlanUpdate) error + UpdatePlan(*bbolt.Tx, *raft_log.CompactionPlanUpdate) error } type Plan interface { @@ -37,7 +38,7 @@ type Scheduler interface { NewSchedule(*bbolt.Tx, *raft.Log) Schedule // UpdateSchedule adds new jobs and updates the state of existing ones. // Implementation: This method must be idempotent. - UpdateSchedule(*bbolt.Tx, *raft.Log, *raft_log.CompactionPlanUpdate) error + UpdateSchedule(*bbolt.Tx, *raft_log.CompactionPlanUpdate) error } // Schedule prepares changes to the compaction plan based on status updates @@ -58,3 +59,24 @@ type Schedule interface { // The scheduler may decline the job by returning a nil state. AddJob(*raft_log.CompactionJobPlan) *raft_log.CompactionJobState } + +// BlockEntry represents a block metadata entry compaction operates on. +type BlockEntry struct { + Index uint64 + AppendedAt int64 + ID string + Tenant string + Shard uint32 + Level uint32 +} + +func NewBlockEntry(cmd *raft.Log, md *metastorev1.BlockMeta) BlockEntry { + return BlockEntry{ + Index: cmd.Index, + AppendedAt: cmd.AppendedAt.UnixNano(), + ID: md.Id, + Tenant: block.Tenant(md), + Shard: md.Shard, + Level: md.CompactionLevel, + } +} diff --git a/pkg/experiment/metastore/compaction/compactor/compaction_queue.go b/pkg/experiment/metastore/compaction/compactor/compaction_queue.go index 701510b8f5..9fade46d2f 100644 --- a/pkg/experiment/metastore/compaction/compactor/compaction_queue.go +++ b/pkg/experiment/metastore/compaction/compactor/compaction_queue.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" "github.com/grafana/pyroscope/pkg/util" ) @@ -118,7 +118,7 @@ func (q *compactionQueue) reset() { q.levels = q.levels[:0] } -func (q *compactionQueue) push(e store.BlockEntry) bool { +func (q *compactionQueue) push(e compaction.BlockEntry) bool { level := q.blockQueue(e.Level) staged := level.stagedBlocks(compactionKey{ tenant: e.Tenant, diff --git a/pkg/experiment/metastore/compaction/compactor/compaction_queue_bench_test.go b/pkg/experiment/metastore/compaction/compactor/compaction_queue_bench_test.go index d2ebc97b16..e17581058a 100644 --- a/pkg/experiment/metastore/compaction/compactor/compaction_queue_bench_test.go +++ b/pkg/experiment/metastore/compaction/compactor/compaction_queue_bench_test.go @@ -5,7 +5,7 @@ import ( "testing" "time" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" ) func BenchmarkCompactionQueue_Push(b *testing.B) { @@ -42,7 +42,7 @@ func BenchmarkCompactionQueue_Push(b *testing.B) { for i := 0; i < b.N; i++ { for j, key := range keys { - q.push(store.BlockEntry{ + q.push(compaction.BlockEntry{ Index: uint64(j), AppendedAt: writes[j], ID: strconv.Itoa(j), diff --git a/pkg/experiment/metastore/compaction/compactor/compaction_queue_test.go b/pkg/experiment/metastore/compaction/compactor/compaction_queue_test.go index 47ff081750..150fd43cfd 100644 --- a/pkg/experiment/metastore/compaction/compactor/compaction_queue_test.go +++ b/pkg/experiment/metastore/compaction/compactor/compaction_queue_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" ) func testBlockEntry(id int) blockEntry { return blockEntry{id: strconv.Itoa(id)} } @@ -171,7 +171,7 @@ func TestBlockQueue_FlushByAge(t *testing.T) { } c := newCompactionQueue(s, nil) - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "A", Shard: 1, Level: 1, Index: 1, AppendedAt: 5, ID: "1"}, {Tenant: "A", Shard: 1, Level: 1, Index: 2, AppendedAt: 15, ID: "2"}, {Tenant: "A", Shard: 0, Level: 1, Index: 3, AppendedAt: 30, ID: "3"}, diff --git a/pkg/experiment/metastore/compaction/compactor/compactor.go b/pkg/experiment/metastore/compaction/compactor/compactor.go index 7e6e0eac1f..3b2b588ec0 100644 --- a/pkg/experiment/metastore/compaction/compactor/compactor.go +++ b/pkg/experiment/metastore/compaction/compactor/compactor.go @@ -25,9 +25,9 @@ type Tombstones interface { } type BlockQueueStore interface { - StoreEntry(*bbolt.Tx, store.BlockEntry) error + StoreEntry(*bbolt.Tx, compaction.BlockEntry) error DeleteEntry(tx *bbolt.Tx, index uint64, id string) error - ListEntries(*bbolt.Tx) iter.Iterator[store.BlockEntry] + ListEntries(*bbolt.Tx) iter.Iterator[compaction.BlockEntry] CreateBuckets(*bbolt.Tx) error } @@ -66,30 +66,22 @@ func NewStore() *store.BlockQueueStore { return store.NewBlockQueueStore() } -func (c *Compactor) Compact(tx *bbolt.Tx, cmd *raft.Log, md *metastorev1.BlockMeta) error { - if uint(md.CompactionLevel) >= c.config.MaxLevel { +func (c *Compactor) Compact(tx *bbolt.Tx, entry compaction.BlockEntry) error { + if uint(entry.Level) >= c.config.MaxLevel { return nil } - e := store.BlockEntry{ - Index: cmd.Index, - AppendedAt: cmd.AppendedAt.UnixNano(), - ID: md.Id, - Shard: md.Shard, - Level: md.CompactionLevel, - Tenant: md.TenantId, - } - if err := c.store.StoreEntry(tx, e); err != nil { + if err := c.store.StoreEntry(tx, entry); err != nil { return err } - c.enqueue(e) + c.enqueue(entry) return nil } -func (c *Compactor) enqueue(e store.BlockEntry) bool { +func (c *Compactor) enqueue(e compaction.BlockEntry) bool { return c.queue.push(e) } -func (c *Compactor) NewPlan(_ *bbolt.Tx, cmd *raft.Log) compaction.Plan { +func (c *Compactor) NewPlan(cmd *raft.Log) compaction.Plan { before := cmd.AppendedAt.Add(-c.config.CleanupDelay) tombstones := c.tombstones.ListTombstones(before) return &plan{ @@ -99,7 +91,7 @@ func (c *Compactor) NewPlan(_ *bbolt.Tx, cmd *raft.Log) compaction.Plan { } } -func (c *Compactor) UpdatePlan(tx *bbolt.Tx, _ *raft.Log, plan *raft_log.CompactionPlanUpdate) error { +func (c *Compactor) UpdatePlan(tx *bbolt.Tx, plan *raft_log.CompactionPlanUpdate) error { for _, job := range plan.NewJobs { // Delete source blocks from the compaction queue. k := compactionKey{ diff --git a/pkg/experiment/metastore/compaction/compactor/compactor_test.go b/pkg/experiment/metastore/compaction/compactor/compactor_test.go index f0e91b16b8..54a2a50012 100644 --- a/pkg/experiment/metastore/compaction/compactor/compactor_test.go +++ b/pkg/experiment/metastore/compaction/compactor/compactor_test.go @@ -12,7 +12,7 @@ import ( metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" "github.com/grafana/pyroscope/pkg/iter" "github.com/grafana/pyroscope/pkg/test" "github.com/grafana/pyroscope/pkg/test/mocks/mockcompactor" @@ -22,14 +22,18 @@ func TestCompactor_Compact(t *testing.T) { queueStore := new(mockcompactor.MockBlockQueueStore) tombstones := new(mockcompactor.MockTombstones) - md := &metastorev1.BlockMeta{TenantId: "A", Shard: 0, CompactionLevel: 0, Id: "1"} - cmd := &raft.Log{Index: uint64(1), AppendedAt: time.Unix(0, 0)} - compactor := NewCompactor(testConfig, queueStore, tombstones, nil) + e := compaction.BlockEntry{ + Index: 1, + AppendedAt: time.Unix(0, 0).UnixNano(), + ID: "1", + Tenant: "A", + } + compactor := NewCompactor(testConfig, queueStore, tombstones, nil) testErr := errors.New("x") t.Run("fails if cannot store the entry", test.AssertIdempotentSubtest(t, func(t *testing.T) { queueStore.On("StoreEntry", mock.Anything, mock.Anything).Return(testErr) - require.ErrorIs(t, compactor.Compact(nil, cmd, md), testErr) + require.ErrorIs(t, compactor.Compact(nil, e), testErr) })) queueStore.AssertExpectations(t) @@ -47,9 +51,12 @@ func TestCompactor_UpdatePlan(t *testing.T) { compactor := NewCompactor(testConfig, queueStore, tombstones, nil) now := time.Unix(0, 0) for i := 0; i < N; i++ { - cmd := &raft.Log{Index: uint64(1), AppendedAt: now} - md := &metastorev1.BlockMeta{TenantId: "A", Shard: 0, CompactionLevel: 0, Id: strconv.Itoa(i)} - err := compactor.Compact(nil, cmd, md) + err := compactor.Compact(nil, compaction.BlockEntry{ + Index: 1, + AppendedAt: now.UnixNano(), + ID: strconv.Itoa(i), + Tenant: "A", + }) require.NoError(t, err) } @@ -58,7 +65,7 @@ func TestCompactor_UpdatePlan(t *testing.T) { tombstones.On("ListTombstones", mock.Anything). Return(iter.NewEmptyIterator[*metastorev1.Tombstones](), nil) - planner := compactor.NewPlan(nil, &raft.Log{Index: uint64(2), AppendedAt: now}) + planner := compactor.NewPlan(&raft.Log{Index: uint64(2), AppendedAt: now}) for i := range planned { job, err := planner.CreateJob() require.NoError(t, err) @@ -83,10 +90,9 @@ func TestCompactor_UpdatePlan(t *testing.T) { } update := &raft_log.CompactionPlanUpdate{NewJobs: newJobs} - cmd := &raft.Log{Index: uint64(2), AppendedAt: now} - require.NoError(t, compactor.UpdatePlan(nil, cmd, update)) + require.NoError(t, compactor.UpdatePlan(nil, update)) - planner := compactor.NewPlan(nil, &raft.Log{Index: uint64(3), AppendedAt: now}) + planner := compactor.NewPlan(&raft.Log{Index: uint64(3), AppendedAt: now}) job, err := planner.CreateJob() require.NoError(t, err) require.Nil(t, job) @@ -98,7 +104,7 @@ func TestCompactor_UpdatePlan(t *testing.T) { func TestCompactor_Restore(t *testing.T) { queueStore := new(mockcompactor.MockBlockQueueStore) - queueStore.On("ListEntries", mock.Anything).Return(iter.NewSliceIterator([]store.BlockEntry{ + queueStore.On("ListEntries", mock.Anything).Return(iter.NewSliceIterator([]compaction.BlockEntry{ {Index: 0, ID: "0", Tenant: "A"}, {Index: 1, ID: "1", Tenant: "A"}, {Index: 2, ID: "2", Tenant: "A"}, @@ -112,7 +118,7 @@ func TestCompactor_Restore(t *testing.T) { compactor := NewCompactor(testConfig, queueStore, tombstones, nil) require.NoError(t, compactor.Restore(nil)) - planner := compactor.NewPlan(nil, new(raft.Log)) + planner := compactor.NewPlan(new(raft.Log)) planned, err := planner.CreateJob() require.NoError(t, err) require.NotEmpty(t, planned) diff --git a/pkg/experiment/metastore/compaction/compactor/metrics_test.go b/pkg/experiment/metastore/compaction/compactor/metrics_test.go index cfcb30d482..1f96198e0a 100644 --- a/pkg/experiment/metastore/compaction/compactor/metrics_test.go +++ b/pkg/experiment/metastore/compaction/compactor/metrics_test.go @@ -5,13 +5,13 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" ) func TestCollectorRegistration(t *testing.T) { reg := prometheus.NewRegistry() for i := 0; i < 2; i++ { - entries := []store.BlockEntry{ + entries := []compaction.BlockEntry{ {Tenant: "A", Shard: 0, Level: 0}, {Tenant: "A", Shard: 0, Level: 1}, {Tenant: "A", Shard: 0, Level: 1}, diff --git a/pkg/experiment/metastore/compaction/compactor/plan_test.go b/pkg/experiment/metastore/compaction/compactor/plan_test.go index 6af56c563c..be2a182fe3 100644 --- a/pkg/experiment/metastore/compaction/compactor/plan_test.go +++ b/pkg/experiment/metastore/compaction/compactor/plan_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" ) var testConfig = Config{ @@ -22,7 +22,7 @@ func TestPlan_same_level(t *testing.T) { c := NewCompactor(testConfig, nil, nil, nil) var i int // The index is used outside the loop. - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "A", Shard: 0, Level: 0}, {Tenant: "B", Shard: 2, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, @@ -83,7 +83,7 @@ func TestPlan_same_level(t *testing.T) { }, }...) - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "B", Shard: 2, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, // TA-S1-L0 is ready {Tenant: "B", Shard: 2, Level: 0}, // TB-S2-L0 @@ -107,7 +107,7 @@ func TestPlan_level_priority(t *testing.T) { // Lower level job should be planned first despite the arrival order. var i int - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "B", Shard: 2, Level: 1}, {Tenant: "A", Shard: 1, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, @@ -148,7 +148,7 @@ func TestPlan_empty_queue(t *testing.T) { p := &plan{compactor: c, blocks: newBlockIter()} assert.Nil(t, p.nextJob()) - c.enqueue(store.BlockEntry{ + c.enqueue(compaction.BlockEntry{ Index: 0, ID: "0", Tenant: "A", @@ -161,7 +161,7 @@ func TestPlan_empty_queue(t *testing.T) { p = &plan{compactor: c, blocks: newBlockIter()} assert.Nil(t, p.nextJob()) - c.enqueue(store.BlockEntry{ + c.enqueue(compaction.BlockEntry{ Index: 1, ID: "1", Tenant: "A", @@ -179,7 +179,7 @@ func TestPlan_deleted_blocks(t *testing.T) { c := NewCompactor(testConfig, nil, nil, nil) var i int // The index is used outside the loop. - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "A", Shard: 1, Level: 0}, {Tenant: "B", Shard: 2, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, @@ -223,7 +223,7 @@ func TestPlan_deleted_blocks(t *testing.T) { // Now we add some more blocks to make sure that the // invalidated queue can still be compacted. - for _, e := range []store.BlockEntry{ + for _, e := range []compaction.BlockEntry{ {Tenant: "A", Shard: 1, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, {Tenant: "A", Shard: 1, Level: 0}, @@ -253,7 +253,7 @@ func TestPlan_deleted_blocks(t *testing.T) { func TestPlan_deleted_batch(t *testing.T) { c := NewCompactor(testConfig, nil, nil, nil) - for i, e := range []store.BlockEntry{{}, {}, {}} { + for i, e := range []compaction.BlockEntry{{}, {}, {}} { e.Index = uint64(i) e.ID = strconv.Itoa(i) c.enqueue(e) diff --git a/pkg/experiment/metastore/compaction/compactor/store/block_queue_store.go b/pkg/experiment/metastore/compaction/compactor/store/block_queue_store.go index d878a9e7df..893a3907a1 100644 --- a/pkg/experiment/metastore/compaction/compactor/store/block_queue_store.go +++ b/pkg/experiment/metastore/compaction/compactor/store/block_queue_store.go @@ -6,6 +6,7 @@ import ( "go.etcd.io/bbolt" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" "github.com/grafana/pyroscope/pkg/experiment/metastore/store" "github.com/grafana/pyroscope/pkg/iter" ) @@ -14,18 +15,6 @@ var ErrInvalidBlockEntry = errors.New("invalid block entry") var blockQueueBucketName = []byte("compaction_block_queue") -// BlockEntry represents a block metadata entry in the compaction block queue. -type BlockEntry struct { - // Key. Ensures uniqueness and order. - Index uint64 - ID string - // Value. Needed to place the entry in the right queue. - AppendedAt int64 - Level uint32 - Shard uint32 - Tenant string -} - // BlockQueueStore provides methods to store and retrieve block queues. // The store is optimized for two cases: load the entire queue (preserving // the original order) and remove an entry from the queue. @@ -48,7 +37,7 @@ func (s BlockQueueStore) CreateBuckets(tx *bbolt.Tx) error { return err } -func (s BlockQueueStore) StoreEntry(tx *bbolt.Tx, entry BlockEntry) error { +func (s BlockQueueStore) StoreEntry(tx *bbolt.Tx, entry compaction.BlockEntry) error { e := marshalBlockEntry(entry) return tx.Bucket(s.bucketName).Put(e.Key, e.Value) } @@ -57,13 +46,13 @@ func (s BlockQueueStore) DeleteEntry(tx *bbolt.Tx, index uint64, id string) erro return tx.Bucket(s.bucketName).Delete(marshalBlockEntryKey(index, id)) } -func (s BlockQueueStore) ListEntries(tx *bbolt.Tx) iter.Iterator[BlockEntry] { +func (s BlockQueueStore) ListEntries(tx *bbolt.Tx) iter.Iterator[compaction.BlockEntry] { return newBlockEntriesIterator(tx.Bucket(s.bucketName)) } type blockEntriesIterator struct { iter *store.CursorIterator - cur BlockEntry + cur compaction.BlockEntry err error } @@ -79,7 +68,7 @@ func (x *blockEntriesIterator) Next() bool { return x.err == nil } -func (x *blockEntriesIterator) At() BlockEntry { return x.cur } +func (x *blockEntriesIterator) At() compaction.BlockEntry { return x.cur } func (x *blockEntriesIterator) Close() error { return x.iter.Close() } @@ -90,7 +79,7 @@ func (x *blockEntriesIterator) Err() error { return x.err } -func marshalBlockEntry(e BlockEntry) store.KV { +func marshalBlockEntry(e compaction.BlockEntry) store.KV { k := marshalBlockEntryKey(e.Index, e.ID) b := make([]byte, 8+4+4+len(e.Tenant)) binary.BigEndian.PutUint64(b[0:8], uint64(e.AppendedAt)) @@ -107,7 +96,7 @@ func marshalBlockEntryKey(index uint64, id string) []byte { return b } -func unmarshalBlockEntry(dst *BlockEntry, e store.KV) error { +func unmarshalBlockEntry(dst *compaction.BlockEntry, e store.KV) error { if len(e.Key) < 8 || len(e.Value) < 16 { return ErrInvalidBlockEntry } diff --git a/pkg/experiment/metastore/compaction/compactor/store/block_queue_store_test.go b/pkg/experiment/metastore/compaction/compactor/store/block_queue_store_test.go index b8e33d7e8b..a5ed85de94 100644 --- a/pkg/experiment/metastore/compaction/compactor/store/block_queue_store_test.go +++ b/pkg/experiment/metastore/compaction/compactor/store/block_queue_store_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" "github.com/grafana/pyroscope/pkg/test" ) @@ -19,9 +20,9 @@ func TestBlockQueueStore_StoreEntry(t *testing.T) { require.NoError(t, err) require.NoError(t, s.CreateBuckets(tx)) - entries := make([]BlockEntry, 1000) + entries := make([]compaction.BlockEntry, 1000) for i := range entries { - entries[i] = BlockEntry{ + entries[i] = compaction.BlockEntry{ Index: uint64(i), ID: strconv.Itoa(i), AppendedAt: time.Now().UnixNano(), @@ -58,9 +59,9 @@ func TestBlockQueueStore_DeleteEntry(t *testing.T) { require.NoError(t, err) require.NoError(t, s.CreateBuckets(tx)) - entries := make([]BlockEntry, 1000) + entries := make([]compaction.BlockEntry, 1000) for i := range entries { - entries[i] = BlockEntry{ + entries[i] = compaction.BlockEntry{ Index: uint64(i), ID: strconv.Itoa(i), AppendedAt: time.Now().UnixNano(), diff --git a/pkg/experiment/metastore/compaction/scheduler/scheduler.go b/pkg/experiment/metastore/compaction/scheduler/scheduler.go index e1206ef58d..5de49cb587 100644 --- a/pkg/experiment/metastore/compaction/scheduler/scheduler.go +++ b/pkg/experiment/metastore/compaction/scheduler/scheduler.go @@ -91,7 +91,7 @@ func (sc *Scheduler) NewSchedule(tx *bbolt.Tx, cmd *raft.Log) compaction.Schedul } } -func (sc *Scheduler) UpdateSchedule(tx *bbolt.Tx, _ *raft.Log, update *raft_log.CompactionPlanUpdate) error { +func (sc *Scheduler) UpdateSchedule(tx *bbolt.Tx, update *raft_log.CompactionPlanUpdate) error { sc.mu.Lock() defer sc.mu.Unlock() diff --git a/pkg/experiment/metastore/compaction/scheduler/scheduler_test.go b/pkg/experiment/metastore/compaction/scheduler/scheduler_test.go index c104eff0ed..6bf5f9ed2a 100644 --- a/pkg/experiment/metastore/compaction/scheduler/scheduler_test.go +++ b/pkg/experiment/metastore/compaction/scheduler/scheduler_test.go @@ -26,7 +26,7 @@ func TestScheduler_UpdateSchedule(t *testing.T) { scheduler.queue.put(&raft_log.CompactionJobState{Name: "2", Token: 1}) scheduler.queue.put(&raft_log.CompactionJobState{Name: "3", Token: 1}) - err := scheduler.UpdateSchedule(nil, &raft.Log{Index: 2}, &raft_log.CompactionPlanUpdate{ + err := scheduler.UpdateSchedule(nil, &raft_log.CompactionPlanUpdate{ NewJobs: []*raft_log.NewCompactionJob{{ State: &raft_log.CompactionJobState{Name: "1"}, Plan: &raft_log.CompactionJobPlan{Name: "1"}, diff --git a/pkg/experiment/metastore/compaction_raft_handler.go b/pkg/experiment/metastore/compaction_raft_handler.go index bd995b0903..33525f0d3d 100644 --- a/pkg/experiment/metastore/compaction_raft_handler.go +++ b/pkg/experiment/metastore/compaction_raft_handler.go @@ -53,7 +53,7 @@ func (h *CompactionCommandHandler) GetCompactionPlanUpdate( // We need to generate a plan of the update caused by the new status // report from the worker. The plan will be used to update the schedule // after the Raft consensus is reached. - planner := h.planner.NewPlan(tx, cmd) + planner := h.planner.NewPlan(cmd) scheduler := h.scheduler.NewSchedule(tx, cmd) p := new(raft_log.CompactionPlanUpdate) @@ -144,12 +144,12 @@ func (h *CompactionCommandHandler) UpdateCompactionPlan( return new(raft_log.UpdateCompactionPlanResponse), nil } - if err := h.planner.UpdatePlan(tx, cmd, req.PlanUpdate); err != nil { + if err := h.planner.UpdatePlan(tx, req.PlanUpdate); err != nil { level.Error(h.logger).Log("msg", "failed to update compaction planner", "err", err) return nil, err } - if err := h.scheduler.UpdateSchedule(tx, cmd, req.PlanUpdate); err != nil { + if err := h.scheduler.UpdateSchedule(tx, req.PlanUpdate); err != nil { level.Error(h.logger).Log("msg", "failed to update compaction schedule", "err", err) return nil, err } @@ -167,20 +167,20 @@ func (h *CompactionCommandHandler) UpdateCompactionPlan( level.Error(h.logger).Log("msg", "compacted blocks are missing", "job", job.State.Name) continue } - if err := h.index.ReplaceBlocks(tx, compacted); err != nil { - level.Error(h.logger).Log("msg", "failed to replace blocks", "err", err) - return nil, err - } if err := h.tombstones.AddTombstones(tx, cmd, blockTombstonesForCompletedJob(job)); err != nil { level.Error(h.logger).Log("msg", "failed to add tombstones", "err", err) return nil, err } for _, block := range compacted.NewBlocks { - if err := h.compactor.Compact(tx, cmd, block); err != nil { + if err := h.compactor.Compact(tx, compaction.NewBlockEntry(cmd, block)); err != nil { level.Error(h.logger).Log("msg", "failed to compact block", "err", err) return nil, err } } + if err := h.index.ReplaceBlocks(tx, compacted); err != nil { + level.Error(h.logger).Log("msg", "failed to replace blocks", "err", err) + return nil, err + } } return &raft_log.UpdateCompactionPlanResponse{PlanUpdate: req.PlanUpdate}, nil diff --git a/pkg/experiment/metastore/dlq/recovery.go b/pkg/experiment/metastore/dlq/recovery.go index 604284587f..5150acded9 100644 --- a/pkg/experiment/metastore/dlq/recovery.go +++ b/pkg/experiment/metastore/dlq/recovery.go @@ -15,7 +15,7 @@ import ( "github.com/thanos-io/objstore" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - segmentstorage "github.com/grafana/pyroscope/pkg/experiment/ingester/storage" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/experiment/metastore/raftnode" ) @@ -91,7 +91,7 @@ func (r *Recovery) recoverLoop(ctx context.Context) { } func (r *Recovery) recoverTick(ctx context.Context) { - err := r.bucket.Iter(ctx, segmentstorage.PathDLQ, func(metaPath string) error { + err := r.bucket.Iter(ctx, block.DirNameDLQ, func(metaPath string) error { if ctx.Err() != nil { return ctx.Err() } diff --git a/pkg/experiment/metastore/dlq/recovery_test.go b/pkg/experiment/metastore/dlq/recovery_test.go index a9db5ed789..61f0215f24 100644 --- a/pkg/experiment/metastore/dlq/recovery_test.go +++ b/pkg/experiment/metastore/dlq/recovery_test.go @@ -2,12 +2,10 @@ package dlq import ( "context" - "crypto/rand" "sync" "testing" "time" - "github.com/oklog/ulid" "github.com/prometheus/prometheus/util/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -16,7 +14,7 @@ import ( "google.golang.org/grpc/status" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - segmentstorage "github.com/grafana/pyroscope/pkg/experiment/ingester/storage" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/experiment/metastore/raftnode/raftnodepb" "github.com/grafana/pyroscope/pkg/objstore/providers/memory" "github.com/grafana/pyroscope/pkg/test/mocks/mockdlq" @@ -25,20 +23,20 @@ import ( func TestRecoverTick(t *testing.T) { metas := []*metastorev1.BlockMeta{ { - Id: ulid.MustNew(3, rand.Reader).String(), + Id: "3", Shard: 2, }, { - Id: ulid.MustNew(1, rand.Reader).String(), + Id: "1", Shard: 1, }, { - Id: ulid.MustNew(2, rand.Reader).String(), + Id: "2", Shard: 2, }, } - actual := []*metastorev1.BlockMeta{} + var actual []*metastorev1.BlockMeta srv := mockdlq.NewMockLocalServer(t) srv.On("AddRecoveredBlock", mock.Anything, mock.Anything). Times(3). @@ -72,7 +70,7 @@ func TestRecoverTick(t *testing.T) { func TestNotRaftLeader(t *testing.T) { metas := []*metastorev1.BlockMeta{ { - Id: ulid.MustNew(3, rand.Reader).String(), + Id: "1", Shard: 2, }, } @@ -100,21 +98,21 @@ func TestNotRaftLeader(t *testing.T) { func TestStartStop(t *testing.T) { metas := []*metastorev1.BlockMeta{ { - Id: ulid.MustNew(3, rand.Reader).String(), + Id: "3", Shard: 2, }, { - Id: ulid.MustNew(1, rand.Reader).String(), + Id: "1", Shard: 1, }, { - Id: ulid.MustNew(2, rand.Reader).String(), + Id: "2", Shard: 2, }, } m := new(sync.Mutex) - actual := []*metastorev1.BlockMeta{} + var actual []*metastorev1.BlockMeta srv := mockdlq.NewMockLocalServer(t) srv.On("AddRecoveredBlock", mock.Anything, mock.Anything). Times(3). @@ -156,5 +154,5 @@ func TestStartStop(t *testing.T) { func addMeta(bucket *memory.InMemBucket, meta *metastorev1.BlockMeta) { data, _ := meta.MarshalVT() - bucket.Set(segmentstorage.PathForDLQ(meta), data) + bucket.Set(block.MetadataDLQObjectPath(meta), data) } diff --git a/pkg/experiment/metastore/fsm/boltdb.go b/pkg/experiment/metastore/fsm/boltdb.go index 136e926ee6..84e8438f41 100644 --- a/pkg/experiment/metastore/fsm/boltdb.go +++ b/pkg/experiment/metastore/fsm/boltdb.go @@ -16,7 +16,10 @@ import ( const ( boltDBFileName = "metastore.boltdb" boltDBSnapshotName = "metastore_snapshot.boltdb" + boltDBCompactedName = "metastore_compacted.boltdb" boltDBInitialMmapSize = 1 << 30 + + boltDBCompactionMaxTxnSize = 64 << 10 ) type boltdb struct { @@ -65,7 +68,11 @@ func (db *boltdb) open(readOnly bool) (err error) { // open is called with readOnly=true to verify the snapshot integrity. opts.ReadOnly = readOnly opts.PreLoadFreelist = !readOnly - opts.InitialMmapSize = boltDBInitialMmapSize + if !readOnly { + // If we open the DB for restoration/compaction, we don't need + // a large mmap size as no writes are performed. + opts.InitialMmapSize = boltDBInitialMmapSize + } // Because of the nature of the metastore, we do not need to sync // the database: the state is always restored from the snapshot. opts.NoSync = true @@ -82,10 +89,10 @@ func (db *boltdb) open(readOnly bool) (err error) { func (db *boltdb) shutdown() { if db.boltdb != nil { if err := db.boltdb.Sync(); err != nil { - _ = level.Error(db.logger).Log("msg", "failed to sync database", "err", err) + level.Error(db.logger).Log("msg", "failed to sync database", "err", err) } if err := db.boltdb.Close(); err != nil { - _ = level.Error(db.logger).Log("msg", "failed to close database", "err", err) + level.Error(db.logger).Log("msg", "failed to close database", "err", err) } } } @@ -96,22 +103,38 @@ func (db *boltdb) restore(snapshot io.Reader) error { db.metrics.boltDBRestoreSnapshotDuration.Observe(time.Since(start).Seconds()) }() // Snapshot is a full copy of the database, therefore we copy - // it on disk and use it instead of the current database. + // it on disk, compact, and use it instead of the current database. + // Compacting the snapshot is necessary to reclaim the space + // that the source database no longer has use for. This is rather + // wasteful to do it at restoration time, but it helps to reduce + // the footprint and latencies caused by the snapshot capturing. + // Ideally, this should be done in the background, outside the + // snapshot-restore path; however, this will require broad locks + // and will impact the transactions. path, err := db.copySnapshot(snapshot) if err == nil { - // First check the snapshot. - restored := *db - restored.path = path - err = restored.open(true) - // Also check applied index. + restored := &boltdb{ + logger: db.logger, + metrics: db.metrics, + dir: db.dir, + path: path, + } + // Open in Read-Only mode. + if err = restored.open(true); err == nil { + var compacted *boltdb + if compacted, err = restored.compact(); err == nil { + path = compacted.path + } + } restored.shutdown() + err = os.RemoveAll(restored.path) } if err != nil { return fmt.Errorf("failed to restore snapshot: %w", err) } // Note that we do not keep the previous database: in case if the // snapshot is corrupted, we should try another one. - return db.openSnapshot(path) + return db.openPath(path) } func (db *boltdb) copySnapshot(snapshot io.Reader) (path string, err error) { @@ -127,7 +150,36 @@ func (db *boltdb) copySnapshot(snapshot io.Reader) (path string, err error) { return path, err } -func (db *boltdb) openSnapshot(path string) (err error) { +func (db *boltdb) compact() (compacted *boltdb, err error) { + level.Info(db.logger).Log("msg", "compacting snapshot") + src := db.boltdb + compacted = &boltdb{ + logger: db.logger, + metrics: db.metrics, + dir: db.dir, + path: filepath.Join(db.dir, boltDBCompactedName), + } + if err = compacted.open(false); err != nil { + return nil, fmt.Errorf("failed to create db for compaction: %w", err) + } + defer compacted.shutdown() + dst := compacted.boltdb + if err = bbolt.Compact(dst, src, boltDBCompactionMaxTxnSize); err != nil { + return nil, fmt.Errorf("failed to compact db: %w", err) + } + level.Info(db.logger).Log("msg", "boltdb compaction ratio", "ratio", float64(compacted.size())/float64(db.size())) + return compacted, nil +} + +func (db *boltdb) size() int64 { + fi, err := os.Stat(db.path) + if err != nil { + return 0 + } + return fi.Size() +} + +func (db *boltdb) openPath(path string) (err error) { db.shutdown() if err = os.Rename(path, db.path); err != nil { return err diff --git a/pkg/experiment/metastore/fsm/boltdb_test.go b/pkg/experiment/metastore/fsm/boltdb_test.go new file mode 100644 index 0000000000..166d836d87 --- /dev/null +++ b/pkg/experiment/metastore/fsm/boltdb_test.go @@ -0,0 +1,67 @@ +package fsm + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.etcd.io/bbolt" + + "github.com/grafana/pyroscope/pkg/util" +) + +func TestBoltDB_open_restore(t *testing.T) { + tempDir := t.TempDir() + db := newDB(util.Logger, newMetrics(nil), tempDir) + require.NoError(t, db.open(false)) + + data := []string{ + "k1", "v1", + "k2", "v2", + "k3", "v3", + } + + snapshotSource := filepath.Join(tempDir, "snapshot_source") + require.NoError(t, createDB(t, snapshotSource, data).Close()) + s, err := os.ReadFile(snapshotSource) + require.NoError(t, err) + require.NoError(t, db.restore(bytes.NewReader(s))) + + collected := make([]string, 0, len(data)) + require.NoError(t, db.boltdb.View(func(tx *bbolt.Tx) error { + b := tx.Bucket([]byte("test")) + assert.NotNil(t, b) + return b.ForEach(func(k, v []byte) error { + collected = append(collected, string(k), string(v)) + return nil + }) + })) + + assert.Equal(t, data, collected) +} + +func createDB(t *testing.T, path string, pairs []string) *bbolt.DB { + opts := bbolt.Options{ + NoGrowSync: true, + NoFreelistSync: true, + NoSync: true, + FreelistType: bbolt.FreelistMapType, + } + db, err := bbolt.Open(path, 0644, &opts) + require.NoError(t, err) + require.NoError(t, db.Update(func(tx *bbolt.Tx) error { + bucket, err := tx.CreateBucketIfNotExists([]byte("test")) + require.NoError(t, err) + for len(pairs) > 1 { + if err = bucket.Put([]byte(pairs[0]), []byte(pairs[1])); err != nil { + return err + } + pairs = pairs[2:] + } + return nil + })) + return db +} diff --git a/pkg/experiment/metastore/fsm/metrics.go b/pkg/experiment/metastore/fsm/metrics.go index 7c534ab0b1..5c3805eee7 100644 --- a/pkg/experiment/metastore/fsm/metrics.go +++ b/pkg/experiment/metastore/fsm/metrics.go @@ -10,7 +10,7 @@ import ( type metrics struct { boltDBPersistSnapshotDuration prometheus.Histogram - boltDBPersistSnapshotSize prometheus.Histogram + boltDBPersistSnapshotSize prometheus.Gauge boltDBRestoreSnapshotDuration prometheus.Histogram fsmRestoreSnapshotDuration prometheus.Histogram fsmApplyCommandSize *prometheus.HistogramVec @@ -28,11 +28,8 @@ func newMetrics(reg prometheus.Registerer) *metrics { NativeHistogramMinResetDuration: time.Hour, }), - boltDBPersistSnapshotSize: prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "boltdb_persist_snapshot_size_bytes", - NativeHistogramBucketFactor: 1.1, - NativeHistogramMaxBucketNumber: 100, - NativeHistogramMinResetDuration: time.Hour, + boltDBPersistSnapshotSize: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "boltdb_persist_snapshot_size_bytes", }), boltDBRestoreSnapshotDuration: prometheus.NewHistogram(prometheus.HistogramOpts{ diff --git a/pkg/experiment/metastore/fsm/snapshot.go b/pkg/experiment/metastore/fsm/snapshot.go index 89ba8bf654..d572b38c54 100644 --- a/pkg/experiment/metastore/fsm/snapshot.go +++ b/pkg/experiment/metastore/fsm/snapshot.go @@ -42,7 +42,7 @@ func (s *snapshot) Persist(sink raft.SnapshotSink) (err error) { level.Info(s.logger).Log("msg", "persisting snapshot") var n int64 n, err = s.tx.WriteTo(sink) - s.metrics.boltDBPersistSnapshotSize.Observe(float64(n)) + s.metrics.boltDBPersistSnapshotSize.Set(float64(n)) if err != nil { level.Error(s.logger).Log("msg", "failed to write snapshot", "err", err) } diff --git a/pkg/experiment/metastore/index/index.go b/pkg/experiment/metastore/index/index.go index 90dd36becf..c2a9dc2c12 100644 --- a/pkg/experiment/metastore/index/index.go +++ b/pkg/experiment/metastore/index/index.go @@ -1,9 +1,10 @@ package index import ( - "context" + "errors" "flag" "fmt" + "math" "slices" "strings" "sync" @@ -13,67 +14,81 @@ import ( "github.com/go-kit/log/level" "github.com/oklog/ulid" "go.etcd.io/bbolt" - "golang.org/x/sync/errgroup" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/experiment/metastore/index/store" + "github.com/grafana/pyroscope/pkg/iter" +) + +const ( + partitionDuration = 6 * time.Hour + // Indicates that partitions within this window are "protected" from being unloaded. + partitionProtectionWindow = 30 * time.Minute + partitionTenantCacheSize = 32 ) var ErrBlockExists = fmt.Errorf("block already exists") +var DefaultConfig = Config{ + PartitionDuration: partitionDuration, + QueryLookaroundPeriod: partitionDuration, + PartitionCacheSize: partitionTenantCacheSize, +} + +type Config struct { + PartitionCacheSize int `yaml:"partition_cache_size"` + PartitionDuration time.Duration + QueryLookaroundPeriod time.Duration `yaml:"query_lookaround_period"` +} + +func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + // FIXME(kolesnikovae): This parameter is not fully supported. + // Overlapping partitions are difficult to handle correctly; + // without an interval tree, it may also be inefficient. + // Instead, we should consider immutable partition ranges: + // once a partition is created, all the keys targeting the + // time range of the partition should be directed to it. + cfg.PartitionDuration = DefaultConfig.PartitionDuration + f.IntVar(&cfg.PartitionCacheSize, prefix+"partition-cache-size", DefaultConfig.PartitionCacheSize, "How many partitions to keep loaded in memory per tenant.") + f.DurationVar(&cfg.QueryLookaroundPeriod, prefix+"query-lookaround-period", DefaultConfig.QueryLookaroundPeriod, "") +} + type Store interface { CreateBuckets(*bbolt.Tx) error - StoreBlock(*bbolt.Tx, store.PartitionKey, *metastorev1.BlockMeta) error + + StoreBlock(tx *bbolt.Tx, shard *store.TenantShard, md *metastorev1.BlockMeta) error DeleteBlockList(*bbolt.Tx, store.PartitionKey, *metastorev1.BlockList) error - ListPartitions(*bbolt.Tx) []store.PartitionKey - ListShards(*bbolt.Tx, store.PartitionKey) []uint32 - ListTenants(tx *bbolt.Tx, p store.PartitionKey, shard uint32) []string - ListBlocks(tx *bbolt.Tx, p store.PartitionKey, shard uint32, tenant string) []*metastorev1.BlockMeta + ListPartitions(*bbolt.Tx) ([]*store.Partition, error) + LoadTenantShard(*bbolt.Tx, store.PartitionKey, string, uint32) (*store.TenantShard, error) } type Index struct { + logger log.Logger config *Config - - partitionMu sync.Mutex - loadedPartitions map[cacheKey]*indexPartition - allPartitions []*PartitionMeta - store Store - logger log.Logger -} - -type Config struct { - PartitionDuration time.Duration `yaml:"partition_duration"` - PartitionCacheSize int `yaml:"partition_cache_size"` - QueryLookaroundPeriod time.Duration `yaml:"query_lookaround_period"` -} -func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.DurationVar(&cfg.PartitionDuration, prefix+"partition-duration", DefaultConfig.PartitionDuration, "") - f.IntVar(&cfg.PartitionCacheSize, prefix+"partition-cache-size", DefaultConfig.PartitionCacheSize, "How many partitions to keep loaded in memory.") - f.DurationVar(&cfg.QueryLookaroundPeriod, prefix+"query-lookaround-period", DefaultConfig.QueryLookaroundPeriod, "") + mu sync.Mutex + loadedPartitions map[tenantPartitionKey]*indexPartition + partitions []*store.Partition } -var DefaultConfig = Config{ - PartitionDuration: 24 * time.Hour, - PartitionCacheSize: 7, - QueryLookaroundPeriod: time.Hour, +type tenantPartitionKey struct { + partition store.PartitionKey + tenant string } type indexPartition struct { - meta *PartitionMeta - accessedAt time.Time + partition *store.Partition + tenant string shards map[uint32]*indexShard + accessedAt time.Time } type indexShard struct { blocks map[string]*metastorev1.BlockMeta -} - -type cacheKey struct { - partitionKey store.PartitionKey - tenant string + *store.TenantShard } // NewIndex initializes a new metastore index. @@ -87,16 +102,16 @@ type cacheKey struct { // // The index requires a backing Store for loading data in memory. Data is loaded directly via LoadPartitions() or when // looking up blocks with FindBlock() or FindBlocksInRange(). -func NewIndex(logger log.Logger, store Store, cfg *Config) *Index { +func NewIndex(logger log.Logger, s Store, cfg *Config) *Index { // A fixed cache size gives us bounded memory footprint, however changes to the partition duration could reduce // the cache effectiveness. // TODO (aleks-p): // - resize the cache at runtime when the config changes // - consider auto-calculating the cache size to ensure we hold data for e.g., the last 24 hours return &Index{ - loadedPartitions: make(map[cacheKey]*indexPartition, cfg.PartitionCacheSize), - allPartitions: make([]*PartitionMeta, 0), - store: store, + loadedPartitions: make(map[tenantPartitionKey]*indexPartition, cfg.PartitionCacheSize), + partitions: make([]*store.Partition, 0), + store: s, logger: logger, config: cfg, } @@ -106,484 +121,248 @@ func NewStore() *store.IndexStore { return store.NewIndexStore() } -// LoadPartitions reads all partitions from the backing store and loads the recent ones in memory. -func (i *Index) LoadPartitions(tx *bbolt.Tx) { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - - i.allPartitions = i.allPartitions[:0] - clear(i.loadedPartitions) - for _, key := range i.store.ListPartitions(tx) { - pMeta := i.loadPartitionMeta(tx, key) - level.Info(i.logger).Log( - "msg", "loaded metastore index partition", - "key", key, - "ts", pMeta.Ts.Format(time.RFC3339), - "duration", pMeta.Duration, - "tenants", strings.Join(pMeta.Tenants, ",")) - i.allPartitions = append(i.allPartitions, pMeta) - - // load the currently active partition - if pMeta.contains(time.Now().UTC().UnixMilli()) { - i.loadEntirePartition(tx, pMeta) - } - } - level.Info(i.logger).Log("msg", "loaded metastore index partitions", "count", len(i.allPartitions)) - - i.sortPartitions() +func (i *Index) Init(tx *bbolt.Tx) error { + return i.store.CreateBuckets(tx) } -func (i *Index) loadPartitionMeta(tx *bbolt.Tx, key store.PartitionKey) *PartitionMeta { - t, dur, _ := key.Parse() - pMeta := &PartitionMeta{ - Key: key, - Ts: t, - Duration: dur, - Tenants: make([]string, 0), - tenantMap: make(map[string]struct{}), - } - for _, s := range i.store.ListShards(tx, key) { - for _, t := range i.store.ListTenants(tx, key, s) { - pMeta.AddTenant(t) - } - } - return pMeta -} +func (i *Index) Restore(tx *bbolt.Tx) error { + i.mu.Lock() + defer i.mu.Unlock() -// ForEachPartition executes the given function concurrently for each partition. It will be called for all partitions, -// regardless if they are fully loaded in memory or not. -func (i *Index) ForEachPartition(ctx context.Context, fn func(meta *PartitionMeta) error) error { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() + i.partitions = nil + clear(i.loadedPartitions) - g, ctx := errgroup.WithContext(ctx) - for _, meta := range i.allPartitions { - g.Go(func() error { - return fn(meta) - }) - } - err := g.Wait() + var err error + i.partitions, err = i.store.ListPartitions(tx) if err != nil { - level.Error(i.logger).Log("msg", "error during partition iteration", "err", err) return err } - return nil -} -func (i *Index) loadEntirePartition(tx *bbolt.Tx, meta *PartitionMeta) { - for _, s := range i.store.ListShards(tx, meta.Key) { - for _, t := range i.store.ListTenants(tx, meta.Key, s) { - cKey := cacheKey{ - partitionKey: meta.Key, - tenant: t, - } - p, ok := i.loadedPartitions[cKey] - if !ok { - p = &indexPartition{ - meta: meta, - accessedAt: time.Now(), - shards: make(map[uint32]*indexShard), - } - i.loadedPartitions[cKey] = p - } - sh, ok := p.shards[s] - if !ok { - sh = &indexShard{ - blocks: make(map[string]*metastorev1.BlockMeta), - } - p.shards[s] = sh - } - for _, b := range i.store.ListBlocks(tx, meta.Key, s, t) { - sh.blocks[b.Id] = b + for _, p := range i.partitions { + level.Info(i.logger).Log( + "msg", "found metastore index partition", + "timestamp", p.Key.Timestamp.Format(time.RFC3339), + "duration", p.Key.Duration, + "tenants", len(p.TenantShards), + ) + if i.shouldKeepPartition(p) { + level.Info(i.logger).Log("msg", "loading partition in memory") + if err = i.loadPartition(tx, p); err != nil { + return err } } } + + level.Info(i.logger).Log("msg", "loaded metastore index partitions", "count", len(i.partitions)) + i.sortPartitions() + return nil } -func (i *Index) getOrLoadPartition(tx *bbolt.Tx, meta *PartitionMeta, tenant string) *indexPartition { - cKey := cacheKey{ - partitionKey: meta.Key, - tenant: tenant, - } - p, ok := i.loadedPartitions[cKey] - if !ok { - p = &indexPartition{ - meta: meta, - shards: make(map[uint32]*indexShard), - } - for _, s := range i.store.ListShards(tx, meta.Key) { - sh := &indexShard{ - blocks: make(map[string]*metastorev1.BlockMeta), - } - p.shards[s] = sh - for _, b := range i.store.ListBlocks(tx, meta.Key, s, tenant) { - sh.blocks[b.Id] = b - } - } - i.loadedPartitions[cKey] = p - } - p.accessedAt = time.Now().UTC() - i.unloadPartitions() - return p +func (i *Index) shouldKeepPartition(p *store.Partition) bool { + now := time.Now() + low := now.Add(-partitionProtectionWindow) + high := now.Add(partitionProtectionWindow) + return p.Overlaps(low, high) } -// findPartitionMeta retrieves the partition meta for the given key. -func (i *Index) findPartitionMeta(key store.PartitionKey) *PartitionMeta { - for _, p := range i.allPartitions { - if p.Key == key { - return p +func (i *Index) sortPartitions() { + slices.SortFunc(i.partitions, func(a, b *store.Partition) int { + return a.Compare(b) + }) +} + +func (i *Index) loadPartition(tx *bbolt.Tx, p *store.Partition) error { + for tenant, shards := range p.TenantShards { + partition := newIndexPartition(p, tenant) + k := tenantPartitionKey{partition: p.Key, tenant: tenant} + i.loadedPartitions[k] = partition + for shard := range shards { + s, err := i.store.LoadTenantShard(tx, p.Key, tenant, shard) + if err != nil { + level.Error(i.logger).Log("msg", "failed to load shard", "partition", p.Key.Timestamp, "shard", shard, "tenant", tenant) + return err + } + partition.shards[shard] = newIndexShard(s) } } return nil } func (i *Index) InsertBlock(tx *bbolt.Tx, b *metastorev1.BlockMeta) error { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - if x := i.findBlock(tx, b.Shard, b.TenantId, b.Id); x != nil { - return ErrBlockExists - } - i.insertBlock(tx, b) - pk := store.CreatePartitionKey(b.Id, i.config.PartitionDuration) - return i.store.StoreBlock(tx, pk, b) -} - -func (i *Index) InsertBlockNoCheckNoPersist(tx *bbolt.Tx, b *metastorev1.BlockMeta) error { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - i.insertBlock(tx, b) - return nil + i.mu.Lock() + defer i.mu.Unlock() + return i.insertBlock(tx, b) } // insertBlock is the underlying implementation for inserting blocks. It is the caller's responsibility to enforce safe // concurrent access. The method will create a new partition if needed. -func (i *Index) insertBlock(tx *bbolt.Tx, b *metastorev1.BlockMeta) { - meta := i.getOrCreatePartitionMeta(b) - p := i.getOrLoadPartition(tx, meta, b.TenantId) - s, ok := p.shards[b.Shard] - if !ok { - s = &indexShard{ - blocks: make(map[string]*metastorev1.BlockMeta), - } - p.shards[b.Shard] = s - } - _, ok = s.blocks[b.Id] - if !ok { - s.blocks[b.Id] = b +func (i *Index) insertBlock(tx *bbolt.Tx, b *metastorev1.BlockMeta) error { + p := i.getOrCreatePartition(b) + shard, err := i.getOrCreateTenantShard(tx, p, block.Tenant(b), b.Shard) + if err != nil { + return err } + return shard.insert(tx, i.store, b) } -func (i *Index) getOrCreatePartitionMeta(b *metastorev1.BlockMeta) *PartitionMeta { - key := store.CreatePartitionKey(b.Id, i.config.PartitionDuration) - meta := i.findPartitionMeta(key) - - if meta == nil { - ts, duration, _ := key.Parse() - meta = &PartitionMeta{ - Key: key, - Ts: ts, - Duration: duration, - Tenants: make([]string, 0), - tenantMap: make(map[string]struct{}), - } - i.allPartitions = append(i.allPartitions, meta) +func (i *Index) getOrCreatePartition(b *metastorev1.BlockMeta) *store.Partition { + t := ulid.Time(ulid.MustParse(b.Id).Time()) + k := store.NewPartitionKey(t, i.config.PartitionDuration) + p := i.findPartition(k) + if p == nil { + level.Debug(i.logger).Log("msg", "creating new metastore index partition", "key", k) + p = store.NewPartition(k) + i.partitions = append(i.partitions, p) i.sortPartitions() } + return p +} - if b.TenantId != "" { - meta.AddTenant(b.TenantId) - } else { - for _, ds := range b.Datasets { - meta.AddTenant(ds.TenantId) +func (i *Index) findPartition(key store.PartitionKey) *store.Partition { + for _, p := range i.partitions { + if p.Key.Equal(key) { + return p } } - - return meta + return nil } -func (i *Index) getOrCreatePartitionMetaForCacheKey(k cacheKey) *PartitionMeta { - meta := i.findPartitionMeta(k.partitionKey) - if meta == nil { - ts, duration, _ := k.partitionKey.Parse() - meta = &PartitionMeta{ - Key: k.partitionKey, - Ts: ts, - Duration: duration, - Tenants: make([]string, 0), - tenantMap: make(map[string]struct{}), +func (i *Index) getOrCreateTenantShard(tx *bbolt.Tx, p *store.Partition, tenant string, shard uint32) (*indexShard, error) { + k := tenantPartitionKey{partition: p.Key, tenant: tenant} + partition, ok := i.loadedPartitions[k] + if !ok { + i.unloadPartitions() + partition = newIndexPartition(p, tenant) + if err := partition.loadTenant(tx, i.store, tenant); err != nil { + return nil, err } - i.allPartitions = append(i.allPartitions, meta) - i.sortPartitions() + i.loadedPartitions[k] = partition } - return meta -} - -// FindBlock tries to retrieve an existing block from the index. It will load the corresponding partition if it is not -// already loaded. Returns nil if the block cannot be found. -func (i *Index) FindBlock(tx *bbolt.Tx, shardNum uint32, tenant string, blockId string) *metastorev1.BlockMeta { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - return i.findBlock(tx, shardNum, tenant, blockId) -} - -func (i *Index) FindBlocks(tx *bbolt.Tx, list *metastorev1.BlockList) []*metastorev1.BlockMeta { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - - pk := make(map[store.PartitionKey]struct{}) - left := make(map[string]struct{}) - for _, block := range list.Blocks { - pk[store.CreatePartitionKey(block, i.config.PartitionDuration)] = struct{}{} - left[block] = struct{}{} + s, ok := partition.shards[shard] + if !ok { + s = newIndexShard(&store.TenantShard{ + Partition: p.Key, + Tenant: tenant, + Shard: shard, + StringTable: block.NewMetadataStringTable(), + }) + partition.shards[shard] = s + // This is the only way we "remember" the tenant shard. + p.AddTenantShard(tenant, shard) } + partition.accessedAt = time.Now() + return s, nil +} - found := make([]*metastorev1.BlockMeta, 0, len(list.Blocks)) - for k := range pk { - meta := i.findPartitionMeta(k) - if meta == nil { +func (i *Index) FindBlocks(tx *bbolt.Tx, list *metastorev1.BlockList) ([]*metastorev1.BlockMeta, error) { + i.mu.Lock() + defer i.mu.Unlock() + metas := make([]*metastorev1.BlockMeta, 0, len(list.Blocks)) + for k, partitioned := range i.partitionedList(list) { + p := i.findPartition(k) + if p == nil { continue } - p := i.getOrLoadPartition(tx, meta, list.Tenant) - s, _ := p.shards[list.Shard] + s, err := i.getOrLoadTenantShard(tx, p, partitioned.Tenant, partitioned.Shard) + if err != nil { + return nil, err + } if s == nil { continue } - for b := range left { - if block := s.blocks[b]; block != nil { - found = append(found, block) - delete(left, b) + for _, b := range partitioned.Blocks { + if md := s.getBlock(b); md != nil { + metas = append(metas, md) } } } - - return found + return metas, nil } -func (i *Index) findBlock(tx *bbolt.Tx, shardNum uint32, tenant string, blockId string) *metastorev1.BlockMeta { - key := store.CreatePartitionKey(blockId, i.config.PartitionDuration) - - // first try the currently mapped partition - b := i.findBlockInPartition(tx, key, shardNum, tenant, blockId) - if b != nil { - return b - } - - // try other partitions that could contain the block - t := ulid.Time(ulid.MustParse(blockId).Time()).UTC().UnixMilli() - for _, p := range i.allPartitions { - if p.contains(t) { - b := i.findBlockInPartition(tx, p.Key, shardNum, tenant, blockId) - if b != nil { - return b +func (i *Index) partitionedList(list *metastorev1.BlockList) map[store.PartitionKey]*metastorev1.BlockList { + partitions := make(map[store.PartitionKey]*metastorev1.BlockList) + for _, b := range list.Blocks { + k := store.NewPartitionKey(ulid.Time(ulid.MustParse(b).Time()), i.config.PartitionDuration) + v := partitions[k] + if v == nil { + v = &metastorev1.BlockList{ + Shard: list.Shard, + Tenant: list.Tenant, + Blocks: make([]string, 0, len(list.Blocks)), } + partitions[k] = v } + v.Blocks = append(v.Blocks, b) } - return nil + return partitions } -func (i *Index) findBlockInPartition(tx *bbolt.Tx, key store.PartitionKey, shard uint32, tenant string, blockId string) *metastorev1.BlockMeta { - meta := i.findPartitionMeta(key) - if meta == nil { - return nil +func (i *Index) getOrLoadTenantShard(tx *bbolt.Tx, p *store.Partition, tenant string, shard uint32) (*indexShard, error) { + // Check if we've seen any data for the tenant shard at all. + shards, ok := p.TenantShards[tenant] + if !ok { + return nil, nil } - - p := i.getOrLoadPartition(tx, meta, tenant) - - s, _ := p.shards[shard] - if s == nil { - return nil + if _, ok = shards[shard]; !ok { + return nil, nil } - - b, _ := s.blocks[blockId] - - return b -} - -// FindBlocksInRange retrieves all blocks that might contain data for the given time range and tenants. -// -// It is not enough to scan for partition keys that fall in the given time interval. Partitions are built on top of -// block identifiers which refer to the moment a block was created and not to the timestamps of the profiles contained -// within the block (min_time, max_time). This method works around this by including blocks from adjacent partitions. -func (i *Index) FindBlocksInRange(tx *bbolt.Tx, start, end int64, tenants map[string]struct{}) []*metastorev1.BlockMeta { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - startWithLookaround := start - i.config.QueryLookaroundPeriod.Milliseconds() - endWithLookaround := end + i.config.QueryLookaroundPeriod.Milliseconds() - - blocks := make([]*metastorev1.BlockMeta, 0) - - for _, meta := range i.allPartitions { // TODO aleks-p: consider using binary search to find a good starting point - if meta.overlaps(startWithLookaround, endWithLookaround) { - for t := range tenants { - if !meta.HasTenant(t) { - continue - } - p := i.getOrLoadPartition(tx, meta, t) - tenantBlocks := i.collectTenantBlocks(p, start, end) - blocks = append(blocks, tenantBlocks...) - - // return mixed blocks as well, we rely on the caller to filter out the data per tenant / service - p = i.getOrLoadPartition(tx, meta, "") - tenantBlocks = i.collectTenantBlocks(p, start, end) - blocks = append(blocks, tenantBlocks...) - } + k := tenantPartitionKey{partition: p.Key, tenant: tenant} + partition, ok := i.loadedPartitions[k] + if !ok { + // Read from store. + partition = newIndexPartition(p, tenant) + if err := partition.loadTenant(tx, i.store, tenant); err != nil { + return nil, err } - } - - return blocks -} - -func (i *Index) sortPartitions() { - slices.SortFunc(i.allPartitions, func(a, b *PartitionMeta) int { - return a.compare(b) - }) -} - -func (i *Index) collectTenantBlocks(p *indexPartition, start, end int64) []*metastorev1.BlockMeta { - blocks := make([]*metastorev1.BlockMeta, 0) - for _, s := range p.shards { - for _, block := range s.blocks { - if start < block.MaxTime && end >= block.MinTime { - clone := block.CloneVT() - blocks = append(blocks, clone) - } + if len(partition.shards) == 0 { + return nil, nil } + i.unloadPartitions() + i.loadedPartitions[k] = partition } - return blocks + partition.accessedAt = time.Now() + return partition.shards[shard], nil } // ReplaceBlocks removes source blocks from the index and inserts replacement blocks into the index. The intended usage // is for block compaction. The replacement blocks could be added to the same or a different partition. func (i *Index) ReplaceBlocks(tx *bbolt.Tx, compacted *metastorev1.CompactedBlocks) error { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - if err := i.insertBlocks(tx, compacted.NewBlocks); err != nil { - return err - } - return i.deleteBlockList(tx, compacted.SourceBlocks) -} - -func (i *Index) ReplaceBlocksNoCheckNoPersist(tx *bbolt.Tx, compacted *metastorev1.CompactedBlocks) error { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() + i.mu.Lock() + defer i.mu.Unlock() for _, b := range compacted.NewBlocks { - i.insertBlock(tx, b) - } - source := compacted.SourceBlocks - for _, b := range source.Blocks { - i.deleteBlock(source.Shard, source.Tenant, b) - } - return nil -} - -func (i *Index) insertBlocks(tx *bbolt.Tx, blocks []*metastorev1.BlockMeta) error { - for _, b := range blocks { - k := store.CreatePartitionKey(b.Id, i.config.PartitionDuration) - i.insertBlock(tx, b) - if err := i.store.StoreBlock(tx, k, b); err != nil { + if err := i.insertBlock(tx, b); err != nil { + if errors.Is(err, ErrBlockExists) { + continue + } return err } } - return nil + return i.deleteBlockList(tx, compacted.SourceBlocks) } func (i *Index) deleteBlockList(tx *bbolt.Tx, list *metastorev1.BlockList) error { - partitions := make(map[store.PartitionKey]*metastorev1.BlockList) - for _, block := range list.Blocks { - k := store.CreatePartitionKey(block, i.config.PartitionDuration) - v := partitions[k] - if v == nil { - v = &metastorev1.BlockList{ - Shard: list.Shard, - Tenant: list.Tenant, - Blocks: make([]string, 0, len(list.Blocks)), - } - partitions[k] = v - } - v.Blocks = append(v.Blocks, block) - } - for k, partitioned := range partitions { + for k, partitioned := range i.partitionedList(list) { if err := i.store.DeleteBlockList(tx, k, partitioned); err != nil { return err } - ck := cacheKey{partitionKey: k, tenant: list.Tenant} - loaded := i.loadedPartitions[ck] - if loaded == nil { + p := i.findPartition(k) + if p == nil { continue } - shard := loaded.shards[partitioned.Shard] - if shard == nil { + s, err := i.getOrLoadTenantShard(tx, p, partitioned.Tenant, partitioned.Shard) + if err != nil { + return err + } + if s == nil { continue } for _, b := range partitioned.Blocks { - delete(shard.blocks, b) + delete(s.blocks, b) } } return nil } -// deleteBlock deletes a block from the index. It is the caller's responsibility to enforce safe concurrent access. -func (i *Index) deleteBlock(shard uint32, tenant string, blockId string) { - // first try the currently mapped partition - key := store.CreatePartitionKey(blockId, i.config.PartitionDuration) - if ok := i.tryDelete(key, shard, tenant, blockId); ok { - return - } - - // now try all other possible partitions - t := ulid.Time(ulid.MustParse(blockId).Time()).UTC().UnixMilli() - - for _, p := range i.allPartitions { - if p.contains(t) { - if ok := i.tryDelete(p.Key, shard, tenant, blockId); ok { - return - } - } - } -} - -func (i *Index) tryDelete(key store.PartitionKey, shard uint32, tenant string, blockId string) bool { - meta := i.findPartitionMeta(key) - if meta == nil { - return false - } - - cKey := cacheKey{ - partitionKey: key, - tenant: tenant, - } - p, ok := i.loadedPartitions[cKey] - if !ok { - return false - } - - s, ok := p.shards[shard] - if !ok { - return false - } - - if s.blocks[blockId] != nil { - delete(s.blocks, blockId) - return true - } - - return false -} - -func (i *Index) FindPartitionMetas(blockId string) []*PartitionMeta { - i.partitionMu.Lock() - defer i.partitionMu.Unlock() - ts := ulid.Time(ulid.MustParse(blockId).Time()).UTC().UnixMilli() - - metas := make([]*PartitionMeta, 0) - for _, p := range i.allPartitions { - if p.contains(ts) { - metas = append(metas, p) - } - } - return metas -} - func (i *Index) unloadPartitions() { tenantPartitions := make(map[string][]*indexPartition) excessPerTenant := make(map[string]int) @@ -604,15 +383,15 @@ func (i *Index) unloadPartitions() { }) level.Debug(i.logger).Log("msg", "unloading metastore index partitions", "tenant", t, "to_remove", len(partitions)) for _, p := range partitions { - if p.meta.contains(time.Now().UTC().UnixMilli()) { + if i.shouldKeepPartition(p.partition) { continue } - level.Debug(i.logger).Log("unloading metastore index partition", "key", p.meta.Key, "accessed_at", p.accessedAt.Format(time.RFC3339)) - cKey := cacheKey{ - partitionKey: p.meta.Key, - tenant: t, + level.Debug(i.logger).Log("unloading metastore index partition", "key", p.partition.Key, "accessed_at", p.accessedAt.Format(time.RFC3339)) + k := tenantPartitionKey{ + partition: p.partition.Key, + tenant: t, } - delete(i.loadedPartitions, cKey) + delete(i.loadedPartitions, k) toRemove-- if toRemove == 0 { break @@ -621,11 +400,180 @@ func (i *Index) unloadPartitions() { } } -func (i *Index) Init(tx *bbolt.Tx) error { - return i.store.CreateBuckets(tx) +func (i *Index) GetTenantStats(tenant string) *metastorev1.TenantStats { + stats := &metastorev1.TenantStats{ + DataIngested: false, + OldestProfileTime: math.MaxInt64, + NewestProfileTime: math.MinInt64, + } + + i.mu.Lock() + defer i.mu.Unlock() + + for _, p := range i.partitions { + if !p.HasTenant(tenant) { + continue + } + oldest := p.StartTime().UnixMilli() + newest := p.EndTime().UnixMilli() + stats.DataIngested = true + if oldest < stats.OldestProfileTime { + stats.OldestProfileTime = oldest + } + if newest > stats.NewestProfileTime { + stats.NewestProfileTime = newest + } + } + + return stats } -func (i *Index) Restore(tx *bbolt.Tx) error { - i.LoadPartitions(tx) +func (i *Index) QueryMetadata(tx *bbolt.Tx, query MetadataQuery) iter.Iterator[*metastorev1.BlockMeta] { + q, err := newMetadataQuery(i, query) + if err != nil { + return iter.NewErrIterator[*metastorev1.BlockMeta](err) + } + i.mu.Lock() + defer i.mu.Unlock() + // TODO(kolesnikovae): We collect blocks with the mutex held, which + // will cause contention and latency issues. Fix it once we make + // locks more granular (partition-tenant-shard level). + metas, err := iter.Slice[*metastorev1.BlockMeta](q.iterator(tx)) + if err != nil { + return iter.NewErrIterator[*metastorev1.BlockMeta](err) + } + return iter.NewSliceIterator(metas) +} + +func (i *Index) shardIterator(tx *bbolt.Tx, startTime, endTime time.Time, tenants ...string) iter.Iterator[*indexShard] { + startTime = startTime.Add(-i.config.QueryLookaroundPeriod) + endTime = endTime.Add(i.config.QueryLookaroundPeriod) + si := shardIterator{ + tx: tx, + partitions: make([]*store.Partition, 0, len(i.partitions)), + tenants: tenants, + index: i, + } + for _, p := range i.partitions { + if !p.Overlaps(startTime, endTime) { + continue + } + for _, t := range si.tenants { + if p.HasTenant(t) { + si.partitions = append(si.partitions, p) + break + } + } + } + return &si +} + +func newIndexPartition(p *store.Partition, tenant string) *indexPartition { + return &indexPartition{ + partition: p, + tenant: tenant, + shards: make(map[uint32]*indexShard), + accessedAt: time.Now(), + } +} + +func (p *indexPartition) loadTenant(tx *bbolt.Tx, store Store, tenant string) error { + for shard := range p.partition.TenantShards[tenant] { + s, err := store.LoadTenantShard(tx, p.partition.Key, tenant, shard) + if err != nil { + return err + } + if len(s.Blocks) > 0 { + p.shards[shard] = newIndexShard(s) + } + } return nil } + +func newIndexShard(s *store.TenantShard) *indexShard { + x := &indexShard{ + blocks: make(map[string]*metastorev1.BlockMeta), + TenantShard: s, + } + for _, md := range s.Blocks { + x.blocks[md.Id] = md + } + return x +} + +func (s *indexShard) insert(tx *bbolt.Tx, x Store, md *metastorev1.BlockMeta) error { + if _, ok := s.blocks[md.Id]; ok { + return ErrBlockExists + } + s.blocks[md.Id] = md + return x.StoreBlock(tx, s.TenantShard, md) +} + +func (s *indexShard) getBlock(blockID string) *metastorev1.BlockMeta { + md, ok := s.blocks[blockID] + if !ok { + return nil + } + mdCopy := md.CloneVT() + s.TenantShard.StringTable.Export(mdCopy) + return mdCopy +} + +type shardIterator struct { + tx *bbolt.Tx + index *Index + tenants []string + partitions []*store.Partition + shards []*indexShard + cur int + err error +} + +func (si *shardIterator) Close() error { return nil } + +func (si *shardIterator) Err() error { return si.err } + +func (si *shardIterator) At() *indexShard { return si.shards[si.cur] } + +func (si *shardIterator) Next() bool { + if n := si.cur + 1; n < len(si.shards) { + si.cur = n + return true + } + si.cur = 0 + si.shards = si.shards[:0] + for len(si.shards) == 0 && len(si.partitions) > 0 { + si.loadShards(si.partitions[0]) + si.partitions = si.partitions[1:] + } + return si.cur < len(si.shards) +} + +func (si *shardIterator) loadShards(p *store.Partition) { + for _, t := range si.tenants { + shards := p.TenantShards[t] + if shards == nil { + continue + } + for s := range shards { + shard, err := si.index.getOrLoadTenantShard(si.tx, p, t, s) + if err != nil { + si.err = err + return + } + if shard != nil { + si.shards = append(si.shards, shard) + } + } + } + slices.SortFunc(si.shards, compareShards) + si.shards = slices.Compact(si.shards) +} + +func compareShards(a, b *indexShard) int { + cmp := strings.Compare(a.Tenant, b.Tenant) + if cmp == 0 { + return int(a.Shard) - int(b.Shard) + } + return cmp +} diff --git a/pkg/experiment/metastore/index/index_test.go b/pkg/experiment/metastore/index/index_test.go deleted file mode 100644 index f293ab506a..0000000000 --- a/pkg/experiment/metastore/index/index_test.go +++ /dev/null @@ -1,381 +0,0 @@ -package index_test - -import ( - "context" - "crypto/rand" - "sync" - "testing" - "time" - - "github.com/oklog/ulid" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" - - metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - "github.com/grafana/pyroscope/pkg/experiment/metastore/index" - "github.com/grafana/pyroscope/pkg/experiment/metastore/index/store" - "github.com/grafana/pyroscope/pkg/test" - "github.com/grafana/pyroscope/pkg/test/mocks/mockindex" - "github.com/grafana/pyroscope/pkg/util" -) - -func TestIndex_FindBlocksInRange(t *testing.T) { - tests := []struct { - name string - blocks []*metastorev1.BlockMeta - queryStart int64 - queryEnd int64 - want int - }{ - { - name: "matching blocks", - blocks: []*metastorev1.BlockMeta{ - createBlock("20240923T06.1h", 0), - createBlock("20240923T07.1h", 0), - createBlock("20240923T08.1h", 0), - createBlock("20240923T09.1h", 0), - createBlock("20240923T10.1h", 0), - }, - queryStart: test.Time("2024-09-23T08:00:00.000Z"), - queryEnd: test.Time("2024-09-23T09:00:00.000Z"), - want: 2, - }, - { - name: "no matching blocks", - blocks: []*metastorev1.BlockMeta{ - createBlock("20240923T06.1h", 0), - createBlock("20240923T07.1h", 0), - createBlock("20240923T08.1h", 0), - createBlock("20240923T09.1h", 0), - createBlock("20240923T10.1h", 0), - }, - queryStart: test.Time("2024-09-23T04:00:00.000Z"), - queryEnd: test.Time("2024-09-23T05:00:00.000Z"), - want: 0, - }, - { - name: "out of order ingestion (behind on time)", - blocks: []*metastorev1.BlockMeta{ - createBlock("20240923T06.1h", 0), - createBlock("20240923T07.1h", -1*time.Hour), // in range - createBlock("20240923T07.1h", -2*time.Hour), // in range - createBlock("20240923T07.1h", -3*time.Hour), // too old - createBlock("20240923T08.1h", -3*time.Hour), // // technically in range but we will not look here - createBlock("20240923T10.1h", 0), - }, - queryStart: test.Time("2024-09-23T05:00:00.000Z"), - queryEnd: test.Time("2024-09-23T06:00:00.000Z"), - want: 3, - }, - { - name: "out of order ingestion (ahead of time)", - blocks: []*metastorev1.BlockMeta{ - createBlock("20240923T06.1h", 2*time.Hour), // technically in range but we will not look here - createBlock("20240923T07.1h", 1*time.Hour), // in range - createBlock("20240923T07.1h", 3*time.Hour), // too new - createBlock("20240923T08.1h", 0), // in range - createBlock("20240923T08.1h", 1*time.Hour), // in range - createBlock("20240923T10.1h", 0), - }, - queryStart: test.Time("2024-09-23T08:00:00.000Z"), - queryEnd: test.Time("2024-09-23T09:00:00.000Z"), - want: 3, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{}) - i := index.NewIndex(util.Logger, mockStore, &index.Config{ - PartitionDuration: time.Hour, - PartitionCacheSize: 24, - QueryLookaroundPeriod: time.Hour, - }) - for _, b := range tt.blocks { - i.InsertBlockNoCheckNoPersist(nil, b) - } - tenantMap := map[string]struct{}{"tenant-1": {}} - found := i.FindBlocksInRange(nil, tt.queryStart, tt.queryEnd, tenantMap) - require.Equal(t, tt.want, len(found)) - for _, b := range found { - require.Truef( - t, - tt.queryStart < b.MaxTime && tt.queryEnd >= b.MinTime, - "block %s is not in range, %v : %v", b.Id, time.UnixMilli(b.MinTime).UTC(), time.UnixMilli(b.MaxTime).UTC()) - } - }) - } -} - -func TestIndex_FindBlocks(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{}) - i := index.NewIndex(util.Logger, mockStore, &index.Config{ - PartitionDuration: time.Hour, - PartitionCacheSize: 24, - QueryLookaroundPeriod: time.Hour, - }) - a := test.ULID("2024-09-21T08:00:00.123Z") - b := test.ULID("2024-09-22T08:00:00.123Z") - c := test.ULID("2024-09-23T08:00:00.123Z") - i.InsertBlockNoCheckNoPersist(nil, &metastorev1.BlockMeta{Id: a}) - i.InsertBlockNoCheckNoPersist(nil, &metastorev1.BlockMeta{Id: b}) - assert.Len(t, i.FindBlocks(nil, &metastorev1.BlockList{Blocks: []string{a, b, c}}), 2) -} - -func mockPartition(store *mockindex.MockStore, key store.PartitionKey, blocks []*metastorev1.BlockMeta) { - store.On("ListShards", mock.Anything, key).Return([]uint32{0}).Maybe() - store.On("ListTenants", mock.Anything, key, uint32(0)).Return([]string{""}).Maybe() - store.On("ListBlocks", mock.Anything, key, uint32(0), "").Return(blocks).Maybe() -} - -func TestIndex_ForEachPartition(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - i := index.NewIndex(util.Logger, mockStore, &index.Config{PartitionDuration: time.Hour}) - - keys := []store.PartitionKey{ - "20240923T06.1h", - "20240923T07.1h", - "20240923T08.1h", - "20240923T09.1h", - "20240923T10.1h", - } - mockStore.On("ListPartitions", mock.Anything).Return(keys) - for _, key := range keys { - mockPartition(mockStore, key, nil) - } - i.LoadPartitions(nil) - - visited := make(map[store.PartitionKey]struct{}) - var mu sync.Mutex - err := i.ForEachPartition(context.Background(), func(meta *index.PartitionMeta) error { - mu.Lock() - visited[meta.Key] = struct{}{} - mu.Unlock() - return nil - }) - require.NoError(t, err) - - require.Len(t, visited, 5) -} - -func TestIndex_InsertBlock(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{}) - i := index.NewIndex(util.Logger, mockStore, &index.Config{PartitionDuration: time.Hour, PartitionCacheSize: 1}) - block := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-23T08:00:00.123Z"), - TenantId: "tenant-1", - MinTime: test.Time("2024-09-23T08:00:00.000Z"), - MaxTime: test.Time("2024-09-23T08:05:00.000Z"), - } - - i.InsertBlockNoCheckNoPersist(nil, block) - require.NotNil(t, i.FindBlock(nil, 0, "tenant-1", block.Id)) - blocks := i.FindBlocksInRange(nil, test.Time("2024-09-23T07:00:00.000Z"), test.Time("2024-09-23T09:00:00.000Z"), map[string]struct{}{"tenant-1": {}}) - require.Len(t, blocks, 1) - require.Equal(t, block, blocks[0]) - - // inserting the block again is a noop - i.InsertBlockNoCheckNoPersist(nil, block) - blocks = i.FindBlocksInRange(nil, test.Time("2024-09-23T07:00:00.000Z"), test.Time("2024-09-23T09:00:00.000Z"), map[string]struct{}{"tenant-1": {}}) - require.Len(t, blocks, 1) - require.Equal(t, block, blocks[0]) -} - -func TestIndex_LoadPartitions(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - config := &index.Config{PartitionDuration: time.Hour, PartitionCacheSize: 1} - i := index.NewIndex(util.Logger, mockStore, config) - - blocks := make([]*metastorev1.BlockMeta, 0, 420) - for i := 0; i < 420; i++ { - block := &metastorev1.BlockMeta{ - Id: ulid.MustNew(ulid.Now(), rand.Reader).String(), - Shard: 0, - } - blocks = append(blocks, block) - } - - partitionKey := store.CreatePartitionKey(blocks[0].Id, config.PartitionDuration) - mockStore.On("ListPartitions", mock.Anything).Return([]store.PartitionKey{partitionKey}) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{0}) - mockStore.On("ListTenants", mock.Anything, mock.Anything, mock.Anything).Return([]string{""}) - mockStore.On("ListBlocks", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(blocks) - - // restore from store - i.LoadPartitions(nil) - - for _, b := range blocks { - require.NotNilf(t, i.FindBlock(nil, b.Shard, b.TenantId, b.Id), "block %s not found", b.Id) - } -} - -func TestIndex_ReplaceBlocks(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{}) - i := index.NewIndex(util.Logger, mockStore, &index.DefaultConfig) - b1 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-23T08:00:00.123Z"), - } - i.InsertBlockNoCheckNoPersist(nil, b1) - b2 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-23T08:00:00.123Z"), - } - i.InsertBlockNoCheckNoPersist(nil, b2) - - replacement := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-23T08:00:00.123Z"), - CompactionLevel: 1, - TenantId: "tenant-1", - } - - compacted := &metastorev1.CompactedBlocks{ - SourceBlocks: &metastorev1.BlockList{ - Tenant: "", - Shard: 0, - Blocks: []string{b1.Id, b2.Id}, - }, - NewBlocks: []*metastorev1.BlockMeta{replacement}, - } - - require.NoError(t, i.ReplaceBlocksNoCheckNoPersist(nil, compacted)) - require.Nil(t, i.FindBlock(nil, 0, "", b1.Id)) - require.Nil(t, i.FindBlock(nil, 0, "", b2.Id)) - require.NotNil(t, i.FindBlock(nil, 0, "tenant-1", replacement.Id)) -} - -func TestIndex_DurationChange(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - mockStore.On("ListShards", mock.Anything, mock.Anything).Return([]uint32{}) - config := &index.Config{PartitionDuration: 24 * time.Hour, PartitionCacheSize: 1} - i := index.NewIndex(util.Logger, mockStore, config) - b := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-23T08:00:00.123Z"), - } - i.InsertBlockNoCheckNoPersist(nil, b) - require.NotNil(t, i.FindBlock(nil, 0, "", b.Id)) - - config.PartitionDuration = time.Hour - require.NotNil(t, i.FindBlock(nil, 0, "", b.Id)) -} - -func TestIndex_UnloadPartitions(t *testing.T) { - mockStore := mockindex.NewMockStore(t) - i := index.NewIndex(util.Logger, mockStore, &index.Config{PartitionDuration: time.Hour, PartitionCacheSize: 3}) - - keys := []store.PartitionKey{ - "20240923T06.1h", - "20240923T07.1h", - "20240923T08.1h", - "20240923T09.1h", - "20240923T10.1h", - } - mockStore.On("ListPartitions", mock.Anything).Return(keys) - for _, key := range keys { - mockPartition(mockStore, key, nil) - } - i.LoadPartitions(nil) - require.True(t, mockStore.AssertNumberOfCalls(t, "ListShards", 5)) - - for _, key := range keys { - start, _, _ := key.Parse() - for c := 0; c < 10; c++ { - i.FindBlocksInRange(nil, start.UnixMilli(), start.Add(5*time.Minute).UnixMilli(), map[string]struct{}{"": {}}) - } - } - // multiple reads cause a single store access - require.True(t, mockStore.AssertNumberOfCalls(t, "ListShards", 10)) - - for c := 0; c < 10; c++ { - i.FindBlocksInRange(nil, test.Time("2024-09-23T08:00:00.000Z"), test.Time("2024-09-23T08:05:00.000Z"), map[string]struct{}{"": {}}) - } - // this partition is still loaded in memory - require.True(t, mockStore.AssertNumberOfCalls(t, "ListShards", 10)) - - for c := 0; c < 10; c++ { - i.FindBlocksInRange(nil, test.Time("2024-09-23T06:00:00.000Z"), test.Time("2024-09-23T06:05:00.000Z"), map[string]struct{}{"": {}}) - } - // this partition was unloaded - require.True(t, mockStore.AssertNumberOfCalls(t, "ListShards", 11)) -} - -func createBlock(key string, offset time.Duration) *metastorev1.BlockMeta { - pKey := store.PartitionKey(key) - ts, _, _ := pKey.Parse() - return &metastorev1.BlockMeta{ - Id: test.ULID(ts.Format(time.RFC3339)), - MinTime: ts.Add(offset).UnixMilli(), - MaxTime: ts.Add(offset).Add(5 * time.Minute).UnixMilli(), - TenantId: "tenant-1", - } -} - -func TestReplaceBlocks_Persistence(t *testing.T) { - db := test.BoltDB(t) - c := &index.Config{ - PartitionDuration: 24 * time.Hour, - PartitionCacheSize: 7, - QueryLookaroundPeriod: time.Hour, - } - md1 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-22T08:00:00.123Z"), - Shard: 3, - CompactionLevel: 0, - TenantId: "", - } - md2 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-22T08:01:00.123Z"), - Shard: 3, - CompactionLevel: 0, - TenantId: "", - } - md3 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-25T09:00:00.123Z"), - Shard: 3, - CompactionLevel: 1, - TenantId: "x1", - } - md4 := &metastorev1.BlockMeta{ - Id: test.ULID("2024-09-25T09:01:00.123Z"), - Shard: 3, - CompactionLevel: 1, - TenantId: "x2", - } - - x := index.NewIndex(util.Logger, index.NewStore(), c) - require.NoError(t, db.Update(x.Init)) - require.NoError(t, db.View(x.Restore)) - - require.NoError(t, db.Update(func(tx *bbolt.Tx) error { - return x.InsertBlock(tx, md1) - })) - require.NoError(t, db.Update(func(tx *bbolt.Tx) error { - return x.InsertBlock(tx, md2) - })) - - require.NoError(t, db.Update(func(tx *bbolt.Tx) error { - return x.ReplaceBlocks(tx, &metastorev1.CompactedBlocks{ - NewBlocks: []*metastorev1.BlockMeta{md3, md4}, - SourceBlocks: &metastorev1.BlockList{ - Tenant: md1.TenantId, - Shard: md1.Shard, - Blocks: []string{md1.Id, md2.Id}, - }, - }) - })) - - x = index.NewIndex(util.Logger, index.NewStore(), c) - require.NoError(t, db.Update(x.Init)) - require.NoError(t, db.View(x.Restore)) - require.NoError(t, db.View(func(tx *bbolt.Tx) error { - require.Nil(t, x.FindBlock(tx, md1.Shard, md1.TenantId, md1.Id)) - require.Nil(t, x.FindBlock(tx, md2.Shard, md2.TenantId, md2.Id)) - require.NotNil(t, x.FindBlock(tx, md3.Shard, md3.TenantId, md3.Id)) - require.NotNil(t, x.FindBlock(tx, md4.Shard, md4.TenantId, md4.Id)) - return nil - })) -} diff --git a/pkg/experiment/metastore/index/partition_meta.go b/pkg/experiment/metastore/index/partition_meta.go deleted file mode 100644 index d080ca0039..0000000000 --- a/pkg/experiment/metastore/index/partition_meta.go +++ /dev/null @@ -1,64 +0,0 @@ -package index - -import ( - "time" - - "github.com/grafana/pyroscope/pkg/experiment/metastore/index/store" -) - -type PartitionMeta struct { - Key store.PartitionKey - Ts time.Time - Duration time.Duration - Tenants []string - - tenantMap map[string]struct{} -} - -func (m *PartitionMeta) HasTenant(tenant string) bool { - m.loadTenants() - _, ok := m.tenantMap[tenant] - return ok -} - -func (m *PartitionMeta) StartTime() time.Time { - return m.Ts -} - -func (m *PartitionMeta) EndTime() time.Time { - return m.Ts.Add(m.Duration) -} - -func (m *PartitionMeta) loadTenants() { - if len(m.Tenants) > 0 && len(m.tenantMap) == 0 { - m.tenantMap = make(map[string]struct{}, len(m.Tenants)) - for _, t := range m.Tenants { - m.tenantMap[t] = struct{}{} - } - } -} - -func (m *PartitionMeta) AddTenant(tenant string) { - m.loadTenants() - if _, ok := m.tenantMap[tenant]; !ok { - m.tenantMap[tenant] = struct{}{} - m.Tenants = append(m.Tenants, tenant) - } -} - -func (m *PartitionMeta) compare(other *PartitionMeta) int { - if m == other { - return 0 - } - return m.Ts.Compare(other.Ts) -} - -// [ m.StartTime(), m.EndTime() ) -func (m *PartitionMeta) overlaps(start, end int64) bool { - return start < m.EndTime().UnixMilli() && end >= m.StartTime().UnixMilli() -} - -// [ m.StartTime(), m.EndTime() ) -func (m *PartitionMeta) contains(t int64) bool { - return t >= m.StartTime().UnixMilli() && t < m.EndTime().UnixMilli() -} diff --git a/pkg/experiment/metastore/index/partition_meta_test.go b/pkg/experiment/metastore/index/partition_meta_test.go deleted file mode 100644 index 3177bf6a7e..0000000000 --- a/pkg/experiment/metastore/index/partition_meta_test.go +++ /dev/null @@ -1,104 +0,0 @@ -package index - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" -) - -func TestPartitionMeta_overlaps(t *testing.T) { - type args struct { - start time.Time - end time.Time - } - tests := []struct { - name string - meta PartitionMeta - args args - want bool - }{ - { - name: "simple overlap", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T07:15:24.123Z"), - end: createTime("2024-09-11T13:15:24.123Z"), - }, - want: true, - }, - { - name: "overlap at partition start", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T04:00:00.000Z"), - end: createTime("2024-09-11T06:00:00.000Z"), - }, - want: true, - }, - { - name: "no overlap close to partition start", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T04:00:00.000Z"), - end: createTime("2024-09-11T05:59:59.999Z"), - }, - want: false, - }, - { - name: "overlap at partition end", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T11:59:59.999Z"), - end: createTime("2024-09-11T13:00:00.000Z"), - }, - want: true, - }, - { - name: "no overlap close to partition end", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T12:00:00.000Z"), - end: createTime("2024-09-11T13:59:59.999Z"), - }, - want: false, - }, - { - name: "overlap around midnight", - meta: PartitionMeta{Ts: createTime("2024-09-11T00:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-10T19:00:00.000Z"), - end: createTime("2024-09-11T00:01:01.999Z"), - }, - want: true, - }, - { - name: "partition fully contains interval", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T07:00:00.000Z"), - end: createTime("2024-09-11T08:01:01.999Z"), - }, - want: true, - }, - { - name: "interval fully contains partition", - meta: PartitionMeta{Ts: createTime("2024-09-11T06:00:00.000Z"), Duration: 6 * time.Hour}, - args: args{ - start: createTime("2024-09-11T02:00:00.000Z"), - end: createTime("2024-09-11T13:01:01.999Z"), - }, - want: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, tt.meta.overlaps(tt.args.start.UnixMilli(), tt.args.end.UnixMilli()), "overlaps(%v, %v)", tt.args.start, tt.args.end) - }) - } -} - -func createTime(t string) time.Time { - ts, _ := time.Parse(time.RFC3339, t) - return ts -} diff --git a/pkg/experiment/metastore/index/query.go b/pkg/experiment/metastore/index/query.go new file mode 100644 index 0000000000..eaa6be40e3 --- /dev/null +++ b/pkg/experiment/metastore/index/query.go @@ -0,0 +1,193 @@ +package index + +import ( + "fmt" + "slices" + "sort" + "strings" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "go.etcd.io/bbolt" + + metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/experiment/block" + "github.com/grafana/pyroscope/pkg/iter" + "github.com/grafana/pyroscope/pkg/model" +) + +type InvalidQueryError struct { + Query MetadataQuery + Err error +} + +func (e *InvalidQueryError) Error() string { + return fmt.Sprintf("invalid query: %v: %v", e.Query, e.Err) +} + +type MetadataQuery struct { + Expr string + StartTime time.Time + EndTime time.Time + Tenant []string +} + +func (q *MetadataQuery) String() string { + return fmt.Sprintf("start: %v, end: %v, tenants: %v, expr: %v", + q.StartTime, + q.EndTime, + strings.Join(q.Tenant, ","), + q.Expr) +} + +type metadataQuery struct { + startTime time.Time + endTime time.Time + tenants []string + index *Index + // Currently, we only check if the dataset name matches + // the service name label. We could extend this to match + // any labels of a dataset. + matcher *labels.Matcher +} + +func newMetadataQuery(index *Index, query MetadataQuery) (*metadataQuery, error) { + if len(query.Tenant) == 0 { + return nil, &InvalidQueryError{Query: query, Err: fmt.Errorf("tenant_id is required")} + } + selectors, err := parser.ParseMetricSelector(query.Expr) + if err != nil { + return nil, &InvalidQueryError{Query: query, Err: fmt.Errorf("failed to parse label selectors: %w", err)} + } + // TODO: Validate the time range. + q := &metadataQuery{ + startTime: query.StartTime, + endTime: query.EndTime, + index: index, + } + q.buildTenantList(query.Tenant) + for _, m := range selectors { + if m.Name == model.LabelNameServiceName && q.matcher == nil { + q.matcher = m + break + } + } + // We could also validate that the service has the profile type + // queried, but that's not really necessary: querying an irrelevant + // profile type is rather a rare/invalid case. + return q, nil +} + +func (q *metadataQuery) buildTenantList(tenants []string) { + m := make(map[string]struct{}, len(tenants)+1) + for _, t := range tenants { + m[t] = struct{}{} + } + m[""] = struct{}{} + q.tenants = make([]string, 0, len(m)) + for t := range m { + q.tenants = append(q.tenants, t) + } + sort.Strings(q.tenants) +} + +func (q *metadataQuery) iterator(tx *bbolt.Tx) *iterator { + shards := q.index.shardIterator(tx, q.startTime, q.endTime, q.tenants...) + si := iterator{ + query: q, + tenants: make(map[string]struct{}, len(q.tenants)), + shards: shards, + } + for _, t := range q.tenants { + si.tenants[t] = struct{}{} + } + return &si +} + +type iterator struct { + query *metadataQuery + tenants map[string]struct{} + shards iter.Iterator[*indexShard] + metas []*metastorev1.BlockMeta + cur int +} + +func (mi *iterator) Close() error { return mi.shards.Close() } +func (mi *iterator) Err() error { return mi.shards.Err() } +func (mi *iterator) At() *metastorev1.BlockMeta { return mi.metas[mi.cur] } + +func (mi *iterator) Next() bool { + if n := mi.cur + 1; n < len(mi.metas) { + mi.cur = n + return true + } + mi.cur = 0 + mi.metas = mi.metas[:0] + for mi.shards.Next() { + if mi.copyMatched(mi.shards.At()) { + break + } + } + return len(mi.metas) > 0 +} + +func (mi *iterator) copyMatched(shard *indexShard) bool { + for _, md := range shard.blocks { + if match := mi.metadataMatch(shard.StringTable, md); match != nil { + mi.metas = append(mi.metas, match) + } + } + slices.SortFunc(mi.metas, func(a, b *metastorev1.BlockMeta) int { + return strings.Compare(a.Id, b.Id) + }) + return len(mi.metas) > 0 +} + +func (mi *iterator) metadataMatch(s *block.MetadataStrings, md *metastorev1.BlockMeta) *metastorev1.BlockMeta { + if !mi.query.overlaps(time.UnixMilli(md.MinTime), time.UnixMilli(md.MaxTime)) { + return nil + } + var mdCopy *metastorev1.BlockMeta + datasets := md.Datasets + for _, ds := range datasets { + if mi.datasetMatches(s, ds) { + if mdCopy == nil { + mdCopy = cloneMetadataForQuery(md) + } + mdCopy.Datasets = append(mdCopy.Datasets, ds.CloneVT()) + } + } + if mdCopy != nil { + s.Export(mdCopy) + } + // May be nil. + return mdCopy +} + +func (mi *iterator) datasetMatches(s *block.MetadataStrings, ds *metastorev1.Dataset) bool { + if _, ok := mi.tenants[s.Lookup(ds.Tenant)]; !ok { + return false + } + if !mi.query.overlaps(time.UnixMilli(ds.MinTime), time.UnixMilli(ds.MaxTime)) { + return false + } + // TODO: Cache; we shouldn't check the same name multiple times. + if mi.query.matcher != nil { + return mi.query.matcher.Matches(s.Lookup(ds.Name)) + } + return true +} + +func (q *metadataQuery) overlaps(start, end time.Time) bool { + return start.Before(q.endTime) && !end.Before(q.startTime) +} + +func cloneMetadataForQuery(b *metastorev1.BlockMeta) *metastorev1.BlockMeta { + datasets := b.Datasets + b.Datasets = nil + c := b.CloneVT() + b.Datasets = datasets + c.Datasets = make([]*metastorev1.Dataset, 0, len(b.Datasets)) + return c +} diff --git a/pkg/experiment/metastore/index/query_test.go b/pkg/experiment/metastore/index/query_test.go new file mode 100644 index 0000000000..f389aef319 --- /dev/null +++ b/pkg/experiment/metastore/index/query_test.go @@ -0,0 +1,191 @@ +package index + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.etcd.io/bbolt" + + metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/iter" + "github.com/grafana/pyroscope/pkg/test" + "github.com/grafana/pyroscope/pkg/util" +) + +func TestIndex_Query(t *testing.T) { + db := test.BoltDB(t) + + minT := test.UnixMilli("2024-09-23T08:00:00.000Z") + maxT := test.UnixMilli("2024-09-23T09:00:00.000Z") + + md := &metastorev1.BlockMeta{ + Id: test.ULID("2024-09-23T08:00:00.001Z"), + Tenant: 0, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + {Tenant: 7, Name: 8, ProfileTypes: []int32{5, 6, 9}, MinTime: maxT, MaxTime: maxT}, + }, + StringTable: []string{ + "", "ingester", + "tenant-a", "dataset-a", "1", "2", "3", + "tenant-b", "dataset-b", "4", + }, + } + + md2 := &metastorev1.BlockMeta{ + Id: test.ULID("2024-09-23T08:00:00.002Z"), + Tenant: 1, + Shard: 1, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 2, + Datasets: []*metastorev1.Dataset{ + {Tenant: 1, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + }, + StringTable: []string{ + "", "tenant-a", "ingester", + "dataset-a", "1", "2", "3", + }, + } + + md3 := &metastorev1.BlockMeta{ + Id: test.ULID("2024-09-23T08:30:00.003Z"), + Tenant: 1, + Shard: 1, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 2, + Datasets: []*metastorev1.Dataset{ + {Tenant: 1, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + }, + StringTable: []string{ + "", "tenant-a", "ingester", + "dataset-a", "1", "2", "3", + }, + } + + query := func(t *testing.T, tx *bbolt.Tx, index *Index) { + t.Run("FindBlocks", func(t *testing.T) { + found, err := index.FindBlocks(tx, &metastorev1.BlockList{Blocks: []string{md.Id}}) + require.NoError(t, err) + require.NotEmpty(t, found) + require.Equal(t, md, found[0]) + + found, err = index.FindBlocks(tx, &metastorev1.BlockList{ + Tenant: "tenant-a", + Shard: 1, + Blocks: []string{md2.Id, md3.Id}, + }) + require.NoError(t, err) + require.NotEmpty(t, found) + require.Equal(t, md2, found[0]) + require.Equal(t, md3, found[1]) + + found, err = index.FindBlocks(tx, &metastorev1.BlockList{ + Tenant: "tenant-b", + Shard: 1, + Blocks: []string{md.Id}, + }) + require.NoError(t, err) + require.Empty(t, found) + + found, err = index.FindBlocks(tx, &metastorev1.BlockList{ + Shard: 1, + Blocks: []string{md.Id}, + }) + require.NoError(t, err) + require.Empty(t, found) + }) + + t.Run("DatasetFilter", func(t *testing.T) { + expected := []*metastorev1.BlockMeta{ + { + Id: md.Id, + Tenant: 0, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 1, + Datasets: []*metastorev1.Dataset{ + {Tenant: 2, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + }, + StringTable: []string{"", "ingester", "tenant-a", "dataset-a", "1", "2", "3"}, + }, + { + Id: md2.Id, + Tenant: 1, + Shard: 1, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 2, + Datasets: []*metastorev1.Dataset{ + {Tenant: 1, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + }, + StringTable: []string{"", "tenant-a", "ingester", "dataset-a", "1", "2", "3"}, + }, + { + Id: md3.Id, + Tenant: 1, + Shard: 1, + MinTime: minT, + MaxTime: maxT, + CreatedBy: 2, + Datasets: []*metastorev1.Dataset{ + {Tenant: 1, Name: 3, ProfileTypes: []int32{4, 5, 6}, MinTime: minT, MaxTime: minT}, + }, + StringTable: []string{"", "tenant-a", "ingester", "dataset-a", "1", "2", "3"}, + }, + } + + found, err := iter.Slice(index.QueryMetadata(tx, MetadataQuery{ + Expr: `{service_name=~"dataset-a"}`, + StartTime: time.UnixMilli(minT), + EndTime: time.UnixMilli(maxT), + Tenant: []string{"tenant-a", "tenant-b"}, + })) + require.NoError(t, err) + require.Equal(t, expected, found) + }) + + t.Run("TimeRangeFilter", func(t *testing.T) { + found, err := iter.Slice(index.QueryMetadata(tx, MetadataQuery{ + Expr: `{service_name=~"dataset-b"}`, + StartTime: time.UnixMilli(minT), + EndTime: time.UnixMilli(maxT), + Tenant: []string{"tenant-b"}, + })) + require.NoError(t, err) + require.Empty(t, found) + }) + } + + idx := NewIndex(util.Logger, NewStore(), &DefaultConfig) + tx, err := db.Begin(true) + require.NoError(t, err) + require.NoError(t, idx.Init(tx)) + require.NoError(t, idx.InsertBlock(tx, md.CloneVT())) + require.NoError(t, idx.InsertBlock(tx, md2.CloneVT())) + require.NoError(t, idx.InsertBlock(tx, md3.CloneVT())) + require.NoError(t, tx.Commit()) + + t.Run("BeforeRestore", func(t *testing.T) { + tx, err := db.Begin(false) + require.NoError(t, err) + query(t, tx, idx) + require.NoError(t, tx.Rollback()) + }) + + t.Run("Restored", func(t *testing.T) { + idx = NewIndex(util.Logger, NewStore(), &DefaultConfig) + tx, err = db.Begin(false) + defer func() { + require.NoError(t, tx.Rollback()) + }() + require.NoError(t, err) + require.NoError(t, idx.Restore(tx)) + query(t, tx, idx) + }) +} diff --git a/pkg/experiment/metastore/index/store/index_store.go b/pkg/experiment/metastore/index/store/index_store.go index 93c03da701..a192a89539 100644 --- a/pkg/experiment/metastore/index/store/index_store.go +++ b/pkg/experiment/metastore/index/store/index_store.go @@ -3,179 +3,298 @@ package store import ( "bytes" "encoding/binary" + "errors" "fmt" "go.etcd.io/bbolt" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/experiment/block" + "github.com/grafana/pyroscope/pkg/experiment/metastore/store" + "github.com/grafana/pyroscope/pkg/iter" ) const ( - partitionBucketName = "partition" - emptyTenantBucketName = "-" + partitionBucketName = "partition" + emptyTenantBucketName = "-" + tenantShardStringsBucketName = ".strings" ) var ( - partitionBucketNameBytes = []byte(partitionBucketName) - emptyTenantBucketNameBytes = []byte(emptyTenantBucketName) + ErrInvalidStringTable = errors.New("malformed string table") ) +var ( + partitionBucketNameBytes = []byte(partitionBucketName) + emptyTenantBucketNameBytes = []byte(emptyTenantBucketName) + tenantShardStringsBucketNameBytes = []byte(tenantShardStringsBucketName) +) + +type Entry struct { + Partition PartitionKey + Shard uint32 + Tenant string + BlockID string + BlockMeta *metastorev1.BlockMeta + StringTable *block.MetadataStrings +} + +type TenantShard struct { + Partition PartitionKey + Tenant string + Shard uint32 + Blocks []*metastorev1.BlockMeta + StringTable *block.MetadataStrings +} + type IndexStore struct{} -func NewIndexStore() *IndexStore { - return &IndexStore{} +func tenantBucketName(tenant string) []byte { + if tenant == "" { + return emptyTenantBucketNameBytes + } + return []byte(tenant) } -func getPartitionBucket(tx *bbolt.Tx) *bbolt.Bucket { +func getPartitionsBucket(tx *bbolt.Tx) *bbolt.Bucket { return tx.Bucket(partitionBucketNameBytes) } +func getOrCreateSubBucket(parent *bbolt.Bucket, name []byte) (*bbolt.Bucket, error) { + bucket := parent.Bucket(name) + if bucket == nil { + return parent.CreateBucket(name) + } + return bucket, nil +} + +func NewIndexStore() *IndexStore { + return &IndexStore{} +} + func (m *IndexStore) CreateBuckets(tx *bbolt.Tx) error { _, err := tx.CreateBucketIfNotExists(partitionBucketNameBytes) return err } -func (m *IndexStore) StoreBlock(tx *bbolt.Tx, pk PartitionKey, b *metastorev1.BlockMeta) error { - key := []byte(b.Id) - value, err := b.MarshalVT() +func (m *IndexStore) StoreBlock(tx *bbolt.Tx, shard *TenantShard, md *metastorev1.BlockMeta) error { + bucket, err := getOrCreateTenantShard(tx, shard.Partition, shard.Tenant, shard.Shard) if err != nil { return err } - partBkt, err := getOrCreateSubBucket(getPartitionBucket(tx), []byte(pk)) - if err != nil { - return fmt.Errorf("error creating partition bucket for %s: %w", pk, err) + n := len(shard.StringTable.Strings) + shard.StringTable.Import(md) + if added := shard.StringTable.Strings[n:]; len(added) > 0 { + strings, err := getOrCreateSubBucket(bucket, tenantShardStringsBucketNameBytes) + if err != nil { + return err + } + k := binary.BigEndian.AppendUint32(nil, uint32(n)) + v := encodeStrings(added) + if err = strings.Put(k, v); err != nil { + return err + } } - - shardBktName := make([]byte, 4) - binary.BigEndian.PutUint32(shardBktName, b.Shard) - shardBkt, err := getOrCreateSubBucket(partBkt, shardBktName) + md.StringTable = nil + value, err := md.MarshalVT() if err != nil { - return fmt.Errorf("error creating shard bucket for partiton %s and shard %d: %w", pk, b.Shard, err) + return err } + return bucket.Put([]byte(md.Id), value) +} - tenantBktName := []byte(b.TenantId) - if len(tenantBktName) == 0 { - tenantBktName = emptyTenantBucketNameBytes - } - tenantBkt, err := getOrCreateSubBucket(shardBkt, tenantBktName) - if err != nil { - return fmt.Errorf("error creating tenant bucket for partition %s, shard %d and tenant %s: %w", pk, b.Shard, b.TenantId, err) - } +func (m *IndexStore) ListPartitions(tx *bbolt.Tx) ([]*Partition, error) { + var partitions []*Partition + root := getPartitionsBucket(tx) + return partitions, root.ForEachBucket(func(partitionKey []byte) error { + var k PartitionKey + if err := k.UnmarshalBinary(partitionKey); err != nil { + return fmt.Errorf("%w: %x", err, partitionKey) + } - return tenantBkt.Put(key, value) -} + p := NewPartition(k) + partition := root.Bucket(partitionKey) + err := partition.ForEachBucket(func(tenant []byte) error { + shards := make(map[uint32]struct{}) + err := partition.Bucket(tenant).ForEachBucket(func(shard []byte) error { + shards[binary.BigEndian.Uint32(shard)] = struct{}{} + return nil + }) + if err != nil { + return err + } + if bytes.Compare(tenant, emptyTenantBucketNameBytes) == 0 { + tenant = nil + } + p.TenantShards[string(tenant)] = shards + return nil + }) -func (m *IndexStore) DeleteBlockList(tx *bbolt.Tx, pk PartitionKey, list *metastorev1.BlockList) error { - partitions := getPartitionBucket(tx) - if partitions == nil { - return nil - } - partition := partitions.Bucket([]byte(pk)) - if partition == nil { - return nil - } - shardBktName := make([]byte, 4) - binary.BigEndian.PutUint32(shardBktName, list.Shard) - shards := partition.Bucket(shardBktName) - if shards == nil { + if err != nil { + return err + } + + partitions = append(partitions, p) return nil - } - tenantBktName := []byte(list.Tenant) - if len(tenantBktName) == 0 { - tenantBktName = emptyTenantBucketNameBytes - } - tenant := shards.Bucket(tenantBktName) - if tenant == nil { + }) +} + +func (m *IndexStore) DeleteBlockList(tx *bbolt.Tx, p PartitionKey, list *metastorev1.BlockList) error { + tenantShard := getTenantShard(tx, p, list.Tenant, list.Shard) + if tenantShard == nil { return nil } for _, b := range list.Blocks { - if err := tenant.Delete([]byte(b)); err != nil { + if err := tenantShard.Delete([]byte(b)); err != nil { return err } } return nil } -func (m *IndexStore) ListPartitions(tx *bbolt.Tx) []PartitionKey { - partitionKeys := make([]PartitionKey, 0) - _ = getPartitionBucket(tx).ForEachBucket(func(name []byte) error { - partitionKeys = append(partitionKeys, PartitionKey(name)) - return nil - }) - return partitionKeys +func getTenantShard(tx *bbolt.Tx, p PartitionKey, tenant string, shard uint32) *bbolt.Bucket { + if partition := getPartitionsBucket(tx).Bucket(p.Bytes()); partition != nil { + if shards := partition.Bucket(tenantBucketName(tenant)); shards != nil { + return shards.Bucket(binary.BigEndian.AppendUint32(nil, shard)) + } + } + return nil } -func (m *IndexStore) ListShards(tx *bbolt.Tx, key PartitionKey) []uint32 { - shards := make([]uint32, 0) - partBkt := getPartitionBucket(tx).Bucket([]byte(key)) - if partBkt == nil { - return nil +func (m *IndexStore) LoadTenantShard(tx *bbolt.Tx, p PartitionKey, tenant string, shard uint32) (*TenantShard, error) { + s, err := m.loadTenantShard(tx, p, tenant, shard) + if err != nil { + return nil, fmt.Errorf("error loading tenant shard %s/%d partition %q: %w", tenant, shard, p, err) } - _ = partBkt.ForEachBucket(func(name []byte) error { - shards = append(shards, binary.BigEndian.Uint32(name)) - return nil - }) - return shards + return s, nil } -func (m *IndexStore) ListTenants(tx *bbolt.Tx, key PartitionKey, shard uint32) []string { - tenants := make([]string, 0) - partBkt := getPartitionBucket(tx).Bucket([]byte(key)) - if partBkt == nil { - return nil +func (m *IndexStore) loadTenantShard(tx *bbolt.Tx, p PartitionKey, tenant string, shard uint32) (*TenantShard, error) { + tenantShard, err := getOrCreateTenantShard(tx, p, tenant, shard) + if err != nil { + return nil, err } - shardBktName := make([]byte, 4) - binary.BigEndian.PutUint32(shardBktName, shard) - shardBkt := partBkt.Bucket(shardBktName) - if shardBkt == nil { - return nil + + s := TenantShard{ + Partition: p, + Tenant: tenant, + Shard: shard, + StringTable: block.NewMetadataStringTable(), + } + + strings := tenantShard.Bucket(tenantShardStringsBucketNameBytes) + if strings == nil { + return &s, nil } - _ = shardBkt.ForEachBucket(func(name []byte) error { - if bytes.Equal(name, emptyTenantBucketNameBytes) { - tenants = append(tenants, "") - } else { - tenants = append(tenants, string(name)) + stringsIter := newStringIter(store.NewCursorIter(nil, strings.Cursor())) + defer func() { + _ = stringsIter.Close() + }() + if err = s.StringTable.Load(stringsIter); err != nil { + return nil, err + } + + err = tenantShard.ForEach(func(k, v []byte) error { + var md metastorev1.BlockMeta + if err = md.UnmarshalVT(v); err != nil { + return fmt.Errorf("failed to unmarshal block %q: %w", string(k), err) } + s.Blocks = append(s.Blocks, &md) return nil }) - return tenants + if err != nil { + return nil, err + } + + return &s, nil } -func (m *IndexStore) ListBlocks(tx *bbolt.Tx, key PartitionKey, shard uint32, tenant string) []*metastorev1.BlockMeta { - blocks := make([]*metastorev1.BlockMeta, 0) - partBkt := getPartitionBucket(tx).Bucket([]byte(key)) - if partBkt == nil { - return nil +func getOrCreateTenantShard(tx *bbolt.Tx, p PartitionKey, tenant string, shard uint32) (*bbolt.Bucket, error) { + partition, err := getOrCreateSubBucket(getPartitionsBucket(tx), p.Bytes()) + if err != nil { + return nil, fmt.Errorf("error creating partition bucket for %s: %w", p, err) } - shardBktName := make([]byte, 4) - binary.BigEndian.PutUint32(shardBktName, shard) - shardBkt := partBkt.Bucket(shardBktName) - if shardBkt == nil { - return nil + shards, err := getOrCreateSubBucket(partition, tenantBucketName(tenant)) + if err != nil { + return nil, fmt.Errorf("error creating shard bucket for tenant %s in parititon %v: %w", tenant, p, err) } - tenantBktName := []byte(tenant) - if len(tenantBktName) == 0 { - tenantBktName = emptyTenantBucketNameBytes + tenantShard, err := getOrCreateSubBucket(shards, binary.BigEndian.AppendUint32(nil, shard)) + if err != nil { + return nil, fmt.Errorf("error creating shard bucket for partiton %s and shard %d: %w", p, shard, err) } - tenantBkt := shardBkt.Bucket(tenantBktName) - if tenantBkt == nil { - return nil + return tenantShard, nil +} + +type stringIterator struct { + iter.Iterator[store.KV] + batch []string + cur int + err error +} + +func newStringIter(i iter.Iterator[store.KV]) *stringIterator { + return &stringIterator{Iterator: i} +} + +func (i *stringIterator) Err() error { + if err := i.Iterator.Err(); err != nil { + return err } - _ = tenantBkt.ForEach(func(k, v []byte) error { - var md metastorev1.BlockMeta - if err := md.UnmarshalVT(v); err != nil { - panic(fmt.Sprintf("failed to unmarshal block %q: %v", string(k), err)) - } - blocks = append(blocks, &md) - return nil - }) - return blocks + return i.err } -func getOrCreateSubBucket(parent *bbolt.Bucket, name []byte) (*bbolt.Bucket, error) { - bucket := parent.Bucket(name) - if bucket == nil { - return parent.CreateBucket(name) +func (i *stringIterator) At() string { return i.batch[i.cur] } + +func (i *stringIterator) Next() bool { + if n := i.cur + 1; n < len(i.batch) { + i.cur = n + return true } - return bucket, nil + i.cur = 0 + i.batch = i.batch[:0] + if !i.Iterator.Next() { + return false + } + var err error + if i.batch, err = decodeStrings(i.batch, i.Iterator.At().Value); err != nil { + i.err = err + return false + } + return len(i.batch) > 0 +} + +func encodeStrings(strings []string) []byte { + size := 4 + for _, s := range strings { + size += 4 + len(s) + } + data := make([]byte, 0, size) + data = binary.BigEndian.AppendUint32(data, uint32(len(strings))) + for _, s := range strings { + data = binary.BigEndian.AppendUint32(data, uint32(len(s))) + data = append(data, s...) + } + return data +} + +func decodeStrings(dst []string, data []byte) ([]string, error) { + offset := 0 + if len(data) < offset+4 { + return dst, ErrInvalidStringTable + } + n := binary.BigEndian.Uint32(data[offset:]) + offset += 4 + for i := uint32(0); i < n; i++ { + if len(data) < offset+4 { + return dst, ErrInvalidStringTable + } + size := binary.BigEndian.Uint32(data[offset:]) + offset += 4 + if len(data) < offset+int(size) { + return dst, ErrInvalidStringTable + } + dst = append(dst, string(data[offset:offset+int(size)])) + offset += int(size) + } + return dst, nil } diff --git a/pkg/experiment/metastore/index/store/partition.go b/pkg/experiment/metastore/index/store/partition.go new file mode 100644 index 0000000000..60232aca54 --- /dev/null +++ b/pkg/experiment/metastore/index/store/partition.go @@ -0,0 +1,93 @@ +package store + +import ( + "encoding/binary" + "errors" + "time" +) + +var ErrInvalidPartitionKey = errors.New("invalid partition key") + +type Partition struct { + Key PartitionKey + TenantShards map[string]map[uint32]struct{} +} + +type PartitionKey struct { + Timestamp time.Time + Duration time.Duration +} + +func NewPartition(k PartitionKey) *Partition { + return &Partition{ + Key: k, + TenantShards: make(map[string]map[uint32]struct{}), + } +} + +func (p *Partition) StartTime() time.Time { return p.Key.Timestamp } +func (p *Partition) EndTime() time.Time { return p.Key.Timestamp.Add(p.Key.Duration) } + +func (p *Partition) Overlaps(start, end time.Time) bool { + return start.Before(p.EndTime()) && !end.Before(p.StartTime()) +} + +func (p *Partition) AddTenantShard(tenant string, shard uint32) { + t := p.TenantShards[tenant] + if t == nil { + t = make(map[uint32]struct{}) + p.TenantShards[tenant] = t + } + t[shard] = struct{}{} +} + +func (p *Partition) HasTenant(t string) bool { + _, ok := p.TenantShards[t] + return ok +} + +func (p *Partition) Compare(other *Partition) int { + if p == other { + return 0 + } + return p.Key.Timestamp.Compare(other.Key.Timestamp) +} + +func NewPartitionKey(timestamp time.Time, duration time.Duration) PartitionKey { + return PartitionKey{Timestamp: timestamp.Truncate(duration), Duration: duration} +} + +func (k PartitionKey) Equal(x PartitionKey) bool { + return k.Timestamp.Equal(x.Timestamp) && k.Duration == x.Duration +} + +func (k PartitionKey) MarshalBinary() ([]byte, error) { + b := make([]byte, 12) + binary.BigEndian.PutUint64(b[0:8], uint64(k.Timestamp.UnixNano())) + binary.BigEndian.PutUint32(b[8:12], uint32(k.Duration/time.Second)) + return b, nil +} + +func (k *PartitionKey) UnmarshalBinary(b []byte) error { + if len(b) != 12 { + return ErrInvalidPartitionKey + } + k.Timestamp = time.Unix(0, int64(binary.BigEndian.Uint64(b[0:8]))) + k.Duration = time.Duration(binary.BigEndian.Uint32(b[8:12])) * time.Second + return nil +} + +func (k PartitionKey) Bytes() []byte { + b, _ := k.MarshalBinary() + return b +} + +func (k PartitionKey) String() string { + b := make([]byte, 0, 32) + b = k.Timestamp.UTC().AppendFormat(b, time.DateTime) + b = append(b, ' ') + b = append(b, '(') + b = append(b, k.Duration.String()...) + b = append(b, ')') + return string(b) +} diff --git a/pkg/experiment/metastore/index/store/partition_key.go b/pkg/experiment/metastore/index/store/partition_key.go deleted file mode 100644 index 7f23f7a26a..0000000000 --- a/pkg/experiment/metastore/index/store/partition_key.go +++ /dev/null @@ -1,65 +0,0 @@ -package store - -import ( - "fmt" - "strings" - "time" - - "github.com/oklog/ulid" - "github.com/prometheus/common/model" -) - -const ( - dayLayout = "20060102" - hourLayout = "20060102T15" -) - -func getTimeLayout(d model.Duration) string { - if time.Duration(d) >= 24*time.Hour { - return dayLayout - } else { - return hourLayout - } -} - -type PartitionKey string - -// CreatePartitionKey creates a partition key for a block. It is meant to be used for newly inserted blocks, as it relies -// on the index's currently configured partition duration to create the key. -// -// Note: Using this for existing blocks following a partition duration change can produce the wrong key. Callers should -// verify that the returned partition actually contains the block. -func CreatePartitionKey(blockId string, dur time.Duration) PartitionKey { - t := ulid.Time(ulid.MustParse(blockId).Time()).UTC() - - var b strings.Builder - b.Grow(16) - - year, month, day := t.Date() - b.WriteString(fmt.Sprintf("%04d%02d%02d", year, month, day)) - - partitionDuration := dur - if partitionDuration < 24*time.Hour { - hour := (t.Hour() / int(partitionDuration.Hours())) * int(partitionDuration.Hours()) - b.WriteString(fmt.Sprintf("T%02d", hour)) - } - - mDuration := model.Duration(partitionDuration) - b.WriteString(".") - b.WriteString(mDuration.String()) - - return PartitionKey(b.String()) -} - -func (k PartitionKey) Parse() (t time.Time, d time.Duration, err error) { - parts := strings.Split(string(k), ".") - if len(parts) != 2 { - return time.Time{}, 0, fmt.Errorf("invalid partition key: %s", k) - } - mDur, err := model.ParseDuration(parts[1]) - if err != nil { - return time.Time{}, 0, fmt.Errorf("invalid duration in partition key: %s", k) - } - t, err = time.Parse(getTimeLayout(mDur), parts[0]) - return t, time.Duration(mDur), err -} diff --git a/pkg/experiment/metastore/index/store/partition_key_test.go b/pkg/experiment/metastore/index/store/partition_key_test.go deleted file mode 100644 index de9f2462ae..0000000000 --- a/pkg/experiment/metastore/index/store/partition_key_test.go +++ /dev/null @@ -1,67 +0,0 @@ -package store - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/grafana/pyroscope/pkg/test" -) - -func TestIndex_GetPartitionKey(t *testing.T) { - tests := []struct { - name string - duration time.Duration - blockId string - want PartitionKey - }{ - { - name: "1d", - duration: test.Duration("1d"), - blockId: test.ULID("2024-07-15T16:13:43.245Z"), - want: PartitionKey("20240715.1d"), - }, - { - name: "1h at start of the window", - duration: test.Duration("1h"), - blockId: test.ULID("2024-07-15T16:00:00.000Z"), - want: PartitionKey("20240715T16.1h"), - }, - { - name: "1h in the middle of the window", - duration: test.Duration("1h"), - blockId: test.ULID("2024-07-15T16:13:43.245Z"), - want: PartitionKey("20240715T16.1h"), - }, - { - name: "1h at the end of the window", - duration: test.Duration("1h"), - blockId: test.ULID("2024-07-15T16:59:59.999Z"), - want: PartitionKey("20240715T16.1h"), - }, - { - name: "6h duration at midnight", - duration: test.Duration("6h"), - blockId: test.ULID("2024-07-15T00:00:00.000Z"), - want: PartitionKey("20240715T00.6h"), - }, - { - name: "6h at the middle of a window", - duration: test.Duration("6h"), - blockId: test.ULID("2024-07-15T15:13:43.245Z"), - want: PartitionKey("20240715T12.6h"), - }, - { - name: "6h at the end of the window", - duration: test.Duration("6h"), - blockId: test.ULID("2024-07-15T23:59:59.999Z"), - want: PartitionKey("20240715T18.6h"), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, CreatePartitionKey(tt.blockId, tt.duration), "CreatePartitionKey(%v)", tt.blockId) - }) - } -} diff --git a/pkg/experiment/metastore/index/store/partition_test.go b/pkg/experiment/metastore/index/store/partition_test.go new file mode 100644 index 0000000000..0c077ad0f0 --- /dev/null +++ b/pkg/experiment/metastore/index/store/partition_test.go @@ -0,0 +1,122 @@ +package store + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/grafana/pyroscope/pkg/test" +) + +func TestPartition_Overlaps(t *testing.T) { + type args struct { + start time.Time + end time.Time + } + tests := []struct { + name string + key PartitionKey + args args + want bool + }{ + { + name: "simple overlap", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T07:15:24.123Z"), + end: test.Time("2024-09-11T13:15:24.123Z"), + }, + want: true, + }, + { + name: "overlap at partition start", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T04:00:00.000Z"), + end: test.Time("2024-09-11T06:00:00.000Z"), + }, + want: true, + }, + { + name: "no overlap close to partition start", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T04:00:00.000Z"), + end: test.Time("2024-09-11T05:59:59.999Z"), + }, + want: false, + }, + { + name: "overlap at partition end", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T11:59:59.999Z"), + end: test.Time("2024-09-11T13:00:00.000Z"), + }, + want: true, + }, + { + name: "no overlap close to partition end", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T12:00:00.000Z"), + end: test.Time("2024-09-11T13:59:59.999Z"), + }, + want: false, + }, + { + name: "overlap around midnight", + key: NewPartitionKey(test.Time("2024-09-11T00:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-10T19:00:00.000Z"), + end: test.Time("2024-09-11T00:01:01.999Z"), + }, + want: true, + }, + { + name: "partition fully contains interval", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T07:00:00.000Z"), + end: test.Time("2024-09-11T08:01:01.999Z"), + }, + want: true, + }, + { + name: "interval fully contains partition", + key: NewPartitionKey(test.Time("2024-09-11T06:00:00.000Z"), 6*time.Hour), + args: args{ + start: test.Time("2024-09-11T02:00:00.000Z"), + end: test.Time("2024-09-11T13:01:01.999Z"), + }, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := NewPartition(tt.key) + assert.Equalf(t, tt.want, p.Overlaps(tt.args.start, tt.args.end), "overlaps(%v, %v)", tt.args.start, tt.args.end) + }) + } +} + +func TestPartitionKey_Equal(t *testing.T) { + k1 := NewPartitionKey(test.Time("2024-09-11T02:00:00.000Z"), 2*time.Hour) + k2 := NewPartitionKey(test.Time("2024-09-11T03:01:01.999Z"), 2*time.Hour) + assert.Equal(t, k1, k2) + + k1 = NewPartitionKey(test.Time("2024-09-11T02:00:00.000Z"), time.Hour) + k2 = NewPartitionKey(test.Time("2024-09-11T03:01:01.999Z"), time.Hour) + assert.NotEqual(t, k1, k2) +} + +func TestPartitionKey_Encoding(t *testing.T) { + k := NewPartitionKey(time.Now(), time.Hour*6) + b, err := k.MarshalBinary() + assert.NoError(t, err) + var d PartitionKey + err = d.UnmarshalBinary(b) + assert.NoError(t, err) + assert.Equal(t, k, d) +} diff --git a/pkg/experiment/metastore/index_raft_handler.go b/pkg/experiment/metastore/index_raft_handler.go index aa76d9dd69..e837dbb6d2 100644 --- a/pkg/experiment/metastore/index_raft_handler.go +++ b/pkg/experiment/metastore/index_raft_handler.go @@ -8,6 +8,7 @@ import ( "go.etcd.io/bbolt" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" "github.com/grafana/pyroscope/pkg/experiment/metastore/index" ) @@ -16,25 +17,21 @@ type Index interface { } type Tombstones interface { - Exists(*metastorev1.BlockMeta) bool -} - -type Compactor interface { - Compact(*bbolt.Tx, *raft.Log, *metastorev1.BlockMeta) error + Exists(tenant string, shard uint32, block string) bool } type IndexCommandHandler struct { logger log.Logger index Index tombstones Tombstones - compactor Compactor + compactor compaction.Compactor } func NewIndexCommandHandler( logger log.Logger, index Index, tombstones Tombstones, - compactor Compactor, + compactor compaction.Compactor, ) *IndexCommandHandler { return &IndexCommandHandler{ logger: logger, @@ -45,21 +42,22 @@ func NewIndexCommandHandler( } func (m *IndexCommandHandler) AddBlock(tx *bbolt.Tx, cmd *raft.Log, req *metastorev1.AddBlockRequest) (*metastorev1.AddBlockResponse, error) { - if m.tombstones.Exists(req.Block) { - level.Warn(m.logger).Log("msg", "block already added and compacted", "block_id", req.Block.Id) + e := compaction.NewBlockEntry(cmd, req.Block) + if m.tombstones.Exists(e.Tenant, e.Shard, e.ID) { + level.Warn(m.logger).Log("msg", "block already added and compacted", "block", e.ID) return new(metastorev1.AddBlockResponse), nil } if err := m.index.InsertBlock(tx, req.Block); err != nil { if errors.Is(err, index.ErrBlockExists) { - level.Warn(m.logger).Log("msg", "block already added", "block_id", req.Block.Id) + level.Warn(m.logger).Log("msg", "block already added", "block", e.ID) return new(metastorev1.AddBlockResponse), nil } - level.Error(m.logger).Log("msg", "failed to add block to index", "block_id", req.Block.Id) + level.Error(m.logger).Log("msg", "failed to add block to index", "block", e.ID, "err", err) return nil, err } - if err := m.compactor.Compact(tx, cmd, req.Block); err != nil { - level.Error(m.logger).Log("msg", "failed to add block to compaction", "block", req.Block.Id, "err", err) + if err := m.compactor.Compact(tx, e); err != nil { + level.Error(m.logger).Log("msg", "failed to add block to compaction", "block", e.ID, "err", err) return nil, err } - return &metastorev1.AddBlockResponse{}, nil + return new(metastorev1.AddBlockResponse), nil } diff --git a/pkg/experiment/metastore/index_service.go b/pkg/experiment/metastore/index_service.go index 6d42b66b0c..e97a64a943 100644 --- a/pkg/experiment/metastore/index_service.go +++ b/pkg/experiment/metastore/index_service.go @@ -5,11 +5,13 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/oklog/ulid" "go.etcd.io/bbolt" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log" + "github.com/grafana/pyroscope/pkg/experiment/block" placement "github.com/grafana/pyroscope/pkg/experiment/distributor/placement/adaptive_placement" "github.com/grafana/pyroscope/pkg/experiment/metastore/fsm" "github.com/grafana/pyroscope/pkg/experiment/metastore/raftnode" @@ -20,11 +22,15 @@ type PlacementStats interface { RecordStats(iter.Iterator[placement.Sample]) } +type IndexBlockFinder interface { + FindBlocks(*bbolt.Tx, *metastorev1.BlockList) ([]*metastorev1.BlockMeta, error) +} + func NewIndexService( logger log.Logger, raft Raft, state State, - index IndexQuerier, + index IndexBlockFinder, stats PlacementStats, ) *IndexService { return &IndexService{ @@ -42,14 +48,14 @@ type IndexService struct { logger log.Logger raft Raft state State - index IndexQuerier + index IndexBlockFinder stats PlacementStats } func (svc *IndexService) AddBlock( ctx context.Context, req *metastorev1.AddBlockRequest, -) (rsp *metastorev1.AddBlockResponse, err error) { +) (resp *metastorev1.AddBlockResponse, err error) { defer func() { if err == nil { svc.stats.RecordStats(statsFromMetadata(req.Block)) @@ -69,33 +75,36 @@ func (svc *IndexService) addBlockMetadata( _ context.Context, req *metastorev1.AddBlockRequest, ) (*metastorev1.AddBlockResponse, error) { - if err := SanitizeMetadata(req.Block); err != nil { - _ = level.Warn(svc.logger).Log("invalid metadata", "block_id", req.Block.Id, "err", err) + if err := block.SanitizeMetadata(req.Block); err != nil { + level.Warn(svc.logger).Log("invalid metadata", "block", req.Block.Id, "err", err) return nil, err } - if err := proposeAddBlockMetadata(svc.raft, req.Block); err != nil { - _ = level.Error(svc.logger).Log("msg", "failed to add block", "block_id", req.Block.Id, "err", err) + _, err := svc.raft.Propose( + fsm.RaftLogEntryType(raft_log.RaftCommand_RAFT_COMMAND_ADD_BLOCK_METADATA), + &raft_log.AddBlockMetadataRequest{Metadata: req.Block}, + ) + if err != nil { + level.Error(svc.logger).Log("msg", "failed to add block", "block", req.Block.Id, "err", err) return nil, err } return new(metastorev1.AddBlockResponse), nil } -func proposeAddBlockMetadata(raft Raft, md *metastorev1.BlockMeta) error { - _, err := raft.Propose( - fsm.RaftLogEntryType(raft_log.RaftCommand_RAFT_COMMAND_ADD_BLOCK_METADATA), - &raft_log.AddBlockMetadataRequest{Metadata: md}, - ) - return err -} - func (svc *IndexService) GetBlockMetadata( ctx context.Context, req *metastorev1.GetBlockMetadataRequest, -) (*metastorev1.GetBlockMetadataResponse, error) { - var found []*metastorev1.BlockMeta - err := svc.state.ConsistentRead(ctx, func(tx *bbolt.Tx, _ raftnode.ReadIndex) { - found = svc.index.FindBlocks(tx, req.GetBlocks()) - }) +) (resp *metastorev1.GetBlockMetadataResponse, err error) { + read := func(tx *bbolt.Tx, _ raftnode.ReadIndex) { + resp, err = svc.getBlockMetadata(tx, req.GetBlocks()) + } + if readErr := svc.state.ConsistentRead(ctx, read); readErr != nil { + return nil, status.Error(codes.Unavailable, readErr.Error()) + } + return resp, err +} + +func (svc *IndexService) getBlockMetadata(tx *bbolt.Tx, list *metastorev1.BlockList) (*metastorev1.GetBlockMetadataResponse, error) { + found, err := svc.index.FindBlocks(tx, list) if err != nil { return nil, err } @@ -125,17 +134,10 @@ func (s *sampleIterator) Next() bool { func (s *sampleIterator) At() placement.Sample { ds := s.md.Datasets[s.cur-1] return placement.Sample{ - TenantID: ds.TenantId, - DatasetName: ds.Name, - ShardOwner: s.md.CreatedBy, + TenantID: s.md.StringTable[ds.Tenant], + DatasetName: s.md.StringTable[ds.Name], + ShardOwner: s.md.StringTable[s.md.CreatedBy], ShardID: s.md.Shard, Size: ds.Size, } } - -// TODO(kolesnikovae): Implement and refactor to the block package. - -func SanitizeMetadata(md *metastorev1.BlockMeta) error { - _, err := ulid.Parse(md.Id) - return err -} diff --git a/pkg/experiment/metastore/query_service.go b/pkg/experiment/metastore/query_service.go index bf26940caa..cc9acdf240 100644 --- a/pkg/experiment/metastore/query_service.go +++ b/pkg/experiment/metastore/query_service.go @@ -2,14 +2,11 @@ package metastore import ( "context" - "fmt" - "slices" - "strings" + "errors" + "time" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/promql/parser" "go.etcd.io/bbolt" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -17,13 +14,19 @@ import ( metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" "github.com/grafana/pyroscope/pkg/experiment/metastore/index" "github.com/grafana/pyroscope/pkg/experiment/metastore/raftnode" - "github.com/grafana/pyroscope/pkg/model" + "github.com/grafana/pyroscope/pkg/iter" ) type IndexQuerier interface { - FindBlocks(tx *bbolt.Tx, list *metastorev1.BlockList) []*metastorev1.BlockMeta - FindBlocksInRange(tx *bbolt.Tx, start, end int64, tenants map[string]struct{}) []*metastorev1.BlockMeta - ForEachPartition(ctx context.Context, f func(*index.PartitionMeta) error) error + QueryMetadata(*bbolt.Tx, index.MetadataQuery) iter.Iterator[*metastorev1.BlockMeta] +} + +type MetadataQueryService struct { + metastorev1.MetadataQueryServiceServer + + logger log.Logger + state State + index IndexQuerier } func NewMetadataQueryService( @@ -38,25 +41,12 @@ func NewMetadataQueryService( } } -type MetadataQueryService struct { - metastorev1.MetadataQueryServiceServer - - logger log.Logger - state State - index IndexQuerier -} - func (svc *MetadataQueryService) QueryMetadata( ctx context.Context, req *metastorev1.QueryMetadataRequest, ) (resp *metastorev1.QueryMetadataResponse, err error) { read := func(tx *bbolt.Tx, _ raftnode.ReadIndex) { - // NOTE(kolesnikovae): that there's a little chance that we read - // applied changes not yet committed by the quorum, because we - // ignore the read index. This is fine in 99.(9)% of cases. - // In the future we should ensure isolation that ensure that we - // do not access the state beyond the read index. - resp, err = svc.listBlocksForQuery(ctx, tx, req) + resp, err = svc.queryMetadata(ctx, tx, req) } if readErr := svc.state.ConsistentRead(ctx, read); readErr != nil { return nil, status.Error(codes.Unavailable, readErr.Error()) @@ -64,108 +54,24 @@ func (svc *MetadataQueryService) QueryMetadata( return resp, err } -func (svc *MetadataQueryService) listBlocksForQuery( - _ context.Context, // TODO(kolesnikovae): Handle cancellation. +func (svc *MetadataQueryService) queryMetadata( + _ context.Context, tx *bbolt.Tx, req *metastorev1.QueryMetadataRequest, ) (*metastorev1.QueryMetadataResponse, error) { - q, err := newMetadataQuery(req) - if err != nil { - return nil, status.Error(codes.InvalidArgument, err.Error()) - } - var resp metastorev1.QueryMetadataResponse - md := make(map[string]*metastorev1.BlockMeta, 32) - - blocks := svc.index.FindBlocksInRange(tx, q.startTime, q.endTime, q.tenants) - if err != nil { - level.Error(svc.logger).Log("msg", "failed to list metastore blocks", "query", q, "err", err) - return nil, status.Error(codes.Internal, err.Error()) - } - for _, block := range blocks { - var clone *metastorev1.BlockMeta - for _, svc := range block.Datasets { - if q.matchService(svc) { - if clone == nil { - clone = cloneBlockForQuery(block) - md[clone.Id] = clone - } - clone.Datasets = append(clone.Datasets, svc) - } - } - } - - resp.Blocks = make([]*metastorev1.BlockMeta, 0, len(md)) - for _, block := range md { - resp.Blocks = append(resp.Blocks, block) - } - slices.SortFunc(resp.Blocks, func(a, b *metastorev1.BlockMeta) int { - return strings.Compare(a.Id, b.Id) - }) - return &resp, nil -} - -type metadataQuery struct { - startTime int64 - endTime int64 - tenants map[string]struct{} - serviceMatcher *labels.Matcher -} - -func (q *metadataQuery) String() string { - return fmt.Sprintf("start: %d, end: %d, tenants: %v, serviceMatcher: %v", q.startTime, q.endTime, q.tenants, q.serviceMatcher) -} - -func newMetadataQuery(request *metastorev1.QueryMetadataRequest) (*metadataQuery, error) { - if len(request.TenantId) == 0 { - return nil, fmt.Errorf("tenant_id is required") - } - q := &metadataQuery{ - startTime: request.StartTime, - endTime: request.EndTime, - tenants: make(map[string]struct{}, len(request.TenantId)), - } - for _, tenant := range request.TenantId { - q.tenants[tenant] = struct{}{} - } - selectors, err := parser.ParseMetricSelector(request.Query) - if err != nil { - return nil, fmt.Errorf("failed to parse label selectors: %w", err) + metas, err := iter.Slice(svc.index.QueryMetadata(tx, index.MetadataQuery{ + Expr: req.Query, + StartTime: time.UnixMilli(req.StartTime), + EndTime: time.UnixMilli(req.EndTime), + Tenant: req.TenantId, + })) + if err == nil { + return &metastorev1.QueryMetadataResponse{Blocks: metas}, nil } - for _, m := range selectors { - if m.Name == model.LabelNameServiceName { - q.serviceMatcher = m - break - } - } - // We could also validate that the service has the profile type - // queried, but that's not really necessary: querying an irrelevant - // profile type is rather a rare/invalid case. - return q, nil -} - -func (q *metadataQuery) matchService(s *metastorev1.Dataset) bool { - _, ok := q.tenants[s.TenantId] - if !ok { - return false - } - if !inRange(s.MinTime, s.MaxTime, q.startTime, q.endTime) { - return false - } - if q.serviceMatcher != nil { - return q.serviceMatcher.Matches(s.Name) + var invalid *index.InvalidQueryError + if errors.As(err, &invalid) { + return nil, status.Error(codes.InvalidArgument, err.Error()) } - return true -} - -func inRange(blockStart, blockEnd, queryStart, queryEnd int64) bool { - return blockStart <= queryEnd && blockEnd >= queryStart -} - -func cloneBlockForQuery(b *metastorev1.BlockMeta) *metastorev1.BlockMeta { - datasets := b.Datasets - b.Datasets = nil - c := b.CloneVT() - b.Datasets = datasets - c.Datasets = make([]*metastorev1.Dataset, 0, len(b.Datasets)) - return c + level.Error(svc.logger).Log("msg", "failed to query metadata", "err", err) + return nil, status.Error(codes.Internal, err.Error()) } diff --git a/pkg/experiment/metastore/tenant_service.go b/pkg/experiment/metastore/tenant_service.go index a57ab19e29..b471a8c6f8 100644 --- a/pkg/experiment/metastore/tenant_service.go +++ b/pkg/experiment/metastore/tenant_service.go @@ -2,8 +2,6 @@ package metastore import ( "context" - "math" - "sync" "github.com/go-kit/log" "go.etcd.io/bbolt" @@ -11,25 +9,25 @@ import ( "google.golang.org/grpc/status" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" - "github.com/grafana/pyroscope/pkg/experiment/metastore/index" "github.com/grafana/pyroscope/pkg/experiment/metastore/raftnode" ) -// TODO(kolesnikovae): The service should not know -// about partitions and the index implementation. +type TenantIndex interface { + GetTenantStats(tenant string) *metastorev1.TenantStats +} type TenantService struct { metastorev1.TenantServiceServer logger log.Logger state State - index IndexQuerier + index TenantIndex } func NewTenantService( logger log.Logger, state State, - index IndexQuerier, + index TenantIndex, ) *TenantService { return &TenantService{ logger: logger, @@ -43,7 +41,7 @@ func (svc *TenantService) GetTenant( req *metastorev1.GetTenantRequest, ) (resp *metastorev1.GetTenantResponse, err error) { read := func(*bbolt.Tx, raftnode.ReadIndex) { - resp, err = svc.getTenantStats(req.TenantId, ctx) + resp = &metastorev1.GetTenantResponse{Stats: svc.index.GetTenantStats(req.TenantId)} } if readErr := svc.state.ConsistentRead(ctx, read); readErr != nil { return nil, status.Error(codes.Unavailable, readErr.Error()) @@ -51,36 +49,6 @@ func (svc *TenantService) GetTenant( return resp, err } -func (svc *TenantService) getTenantStats(tenant string, ctx context.Context) (*metastorev1.GetTenantResponse, error) { - var respMutex sync.Mutex - stats := &metastorev1.TenantStats{ - DataIngested: false, - OldestProfileTime: math.MaxInt64, - NewestProfileTime: math.MinInt64, - } - err := svc.index.ForEachPartition(ctx, func(p *index.PartitionMeta) error { - if !p.HasTenant(tenant) { - return nil - } - oldest := p.StartTime().UnixMilli() - newest := p.EndTime().UnixMilli() - respMutex.Lock() - defer respMutex.Unlock() - stats.DataIngested = true - if oldest < stats.OldestProfileTime { - stats.OldestProfileTime = oldest - } - if newest > stats.NewestProfileTime { - stats.NewestProfileTime = newest - } - return nil - }) - if err != nil { - return nil, err - } - return &metastorev1.GetTenantResponse{Stats: stats}, nil -} - func (svc *TenantService) DeleteTenant( context.Context, *metastorev1.DeleteTenantRequest, diff --git a/pkg/experiment/metastore/tombstones/tombstones.go b/pkg/experiment/metastore/tombstones/tombstones.go index dc03efe1c8..f260202c4a 100644 --- a/pkg/experiment/metastore/tombstones/tombstones.go +++ b/pkg/experiment/metastore/tombstones/tombstones.go @@ -47,10 +47,10 @@ func NewStore() *store.TombstoneStore { return store.NewTombstoneStore() } -func (x *Tombstones) Exists(md *metastorev1.BlockMeta) bool { - tenant, exists := x.blocks[tenantBlockKey{tenant: md.TenantId, shard: md.Shard}] +func (x *Tombstones) Exists(tenant string, shard uint32, block string) bool { + t, exists := x.blocks[tenantBlockKey{tenant: tenant, shard: shard}] if exists { - _, exists = tenant.blocks[md.Id] + _, exists = t.blocks[block] } return exists } diff --git a/pkg/experiment/query_backend/block_reader.go b/pkg/experiment/query_backend/block_reader.go index 1c93ccd9b6..059038ecf9 100644 --- a/pkg/experiment/query_backend/block_reader.go +++ b/pkg/experiment/query_backend/block_reader.go @@ -15,7 +15,7 @@ import ( "google.golang.org/grpc/status" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/objstore" "github.com/grafana/pyroscope/pkg/util" ) diff --git a/pkg/experiment/query_backend/query.go b/pkg/experiment/query_backend/query.go index 242037ca75..1243e9f31e 100644 --- a/pkg/experiment/query_backend/query.go +++ b/pkg/experiment/query_backend/query.go @@ -10,7 +10,7 @@ import ( "github.com/opentracing/opentracing-go" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" ) // TODO(kolesnikovae): We have a procedural definition of our queries, diff --git a/pkg/experiment/query_backend/query_label_names.go b/pkg/experiment/query_backend/query_label_names.go index 08adc132bf..b40d5627d8 100644 --- a/pkg/experiment/query_backend/query_label_names.go +++ b/pkg/experiment/query_backend/query_label_names.go @@ -7,7 +7,7 @@ import ( "github.com/prometheus/prometheus/model/labels" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/phlaredb" ) diff --git a/pkg/experiment/query_backend/query_label_values.go b/pkg/experiment/query_backend/query_label_values.go index 7fe5569314..23834118d1 100644 --- a/pkg/experiment/query_backend/query_label_values.go +++ b/pkg/experiment/query_backend/query_label_values.go @@ -9,7 +9,7 @@ import ( "github.com/prometheus/prometheus/storage" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/phlaredb" ) diff --git a/pkg/experiment/query_backend/query_plan/query_plan.go b/pkg/experiment/query_backend/query_plan/query_plan.go index e2d5749017..7368791cd2 100644 --- a/pkg/experiment/query_backend/query_plan/query_plan.go +++ b/pkg/experiment/query_backend/query_plan/query_plan.go @@ -88,8 +88,8 @@ func printPlan(w io.Writer, pad string, n *queryv1.QueryNode, debug bool) { } case queryv1.QueryNode_READ: - for _, block := range n.Blocks { - _, _ = fmt.Fprintf(w, pad+"\t"+"id:\"%s\"\n", block.Id) + for _, md := range n.Blocks { + _, _ = fmt.Fprintf(w, pad+"\t"+"id:\"%s\"\n", md.Id) } default: diff --git a/pkg/experiment/query_backend/query_pprof.go b/pkg/experiment/query_backend/query_pprof.go index b2cd1aae99..efc9fd4f0b 100644 --- a/pkg/experiment/query_backend/query_pprof.go +++ b/pkg/experiment/query_backend/query_pprof.go @@ -6,7 +6,7 @@ import ( "github.com/grafana/dskit/runutil" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" parquetquery "github.com/grafana/pyroscope/pkg/phlaredb/query" v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/phlaredb/symdb" diff --git a/pkg/experiment/query_backend/query_series_labels.go b/pkg/experiment/query_backend/query_series_labels.go index 647c4b3cc4..c3f1735279 100644 --- a/pkg/experiment/query_backend/query_series_labels.go +++ b/pkg/experiment/query_backend/query_series_labels.go @@ -7,7 +7,7 @@ import ( queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/phlaredb" "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" diff --git a/pkg/experiment/query_backend/query_time_series.go b/pkg/experiment/query_backend/query_time_series.go index 8f8a85cb61..3a9a2be362 100644 --- a/pkg/experiment/query_backend/query_time_series.go +++ b/pkg/experiment/query_backend/query_time_series.go @@ -9,7 +9,7 @@ import ( queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" phlaremodel "github.com/grafana/pyroscope/pkg/model" parquetquery "github.com/grafana/pyroscope/pkg/phlaredb/query" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" diff --git a/pkg/experiment/query_backend/query_tree.go b/pkg/experiment/query_backend/query_tree.go index 5a0cd71f49..21ad773290 100644 --- a/pkg/experiment/query_backend/query_tree.go +++ b/pkg/experiment/query_backend/query_tree.go @@ -6,7 +6,7 @@ import ( "github.com/grafana/dskit/runutil" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" - "github.com/grafana/pyroscope/pkg/experiment/query_backend/block" + "github.com/grafana/pyroscope/pkg/experiment/block" "github.com/grafana/pyroscope/pkg/model" parquetquery "github.com/grafana/pyroscope/pkg/phlaredb/query" v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" diff --git a/pkg/frontend/read_path/query_frontend/compat.go b/pkg/frontend/read_path/query_frontend/compat.go index 4066147391..7f152326e3 100644 --- a/pkg/frontend/read_path/query_frontend/compat.go +++ b/pkg/frontend/read_path/query_frontend/compat.go @@ -76,7 +76,7 @@ func listProfileTypesFromMetadata( startTime int64, endTime int64, ) (*ptypes, error) { - md, err := client.QueryMetadata(ctx, &metastorev1.QueryMetadataRequest{ + resp, err := client.QueryMetadata(ctx, &metastorev1.QueryMetadataRequest{ TenantId: tenants, StartTime: startTime, EndTime: endTime, @@ -85,10 +85,18 @@ func listProfileTypesFromMetadata( if err != nil { return nil, err } - p := newProfileTypesResponseBuilder(len(md.Blocks) * 8) - for _, m := range md.Blocks { - for _, s := range m.Datasets { - p.addServiceProfileTypes(s.Name, s.ProfileTypes...) + p := newProfileTypesResponseBuilder(len(resp.Blocks) * 8) + for _, md := range resp.Blocks { + for _, ds := range md.Datasets { + s := md.StringTable[ds.Name] + sp, ok := p.services[s] + if !ok { + sp = make(map[string]struct{}, len(ds.ProfileTypes)) + p.services[s] = sp + } + for _, t := range ds.ProfileTypes { + sp[md.StringTable[t]] = struct{}{} + } } } return p, nil @@ -161,17 +169,6 @@ func newProfileTypesResponseBuilder(size int) *ptypes { return &ptypes{services: make(map[string]map[string]struct{}, size)} } -func (p *ptypes) addServiceProfileTypes(s string, types ...string) { - sp, ok := p.services[s] - if !ok { - sp = make(map[string]struct{}, len(types)) - p.services[s] = sp - } - for _, t := range types { - sp[t] = struct{}{} - } -} - func (p *ptypes) buildSeriesLabels(names []string) (labels []*typesv1.Labels) { switch len(names) { case 2: diff --git a/pkg/frontend/read_path/query_frontend/query_profile_types.go b/pkg/frontend/read_path/query_frontend/query_profile_types.go index 8dc866b612..381c0b7a1c 100644 --- a/pkg/frontend/read_path/query_frontend/query_profile_types.go +++ b/pkg/frontend/read_path/query_frontend/query_profile_types.go @@ -31,7 +31,7 @@ func (q *QueryFrontend) ProfileTypes( return connect.NewResponse(&querierv1.ProfileTypesResponse{}), nil } - md, err := q.metadataQueryClient.QueryMetadata(ctx, &metastorev1.QueryMetadataRequest{ + resp, err := q.metadataQueryClient.QueryMetadata(ctx, &metastorev1.QueryMetadataRequest{ TenantId: tenants, StartTime: req.Msg.Start, EndTime: req.Msg.End, @@ -43,15 +43,16 @@ func (q *QueryFrontend) ProfileTypes( } pTypesFromMetadata := make(map[string]*typesv1.ProfileType) - for _, b := range md.Blocks { - for _, d := range b.Datasets { - for _, pType := range d.ProfileTypes { - if _, ok := pTypesFromMetadata[pType]; !ok { - profileType, err := phlaremodel.ParseProfileTypeSelector(pType) + for _, md := range resp.Blocks { + for _, ds := range md.Datasets { + for _, p := range ds.ProfileTypes { + t := md.StringTable[p] + if _, ok := pTypesFromMetadata[t]; !ok { + profileType, err := phlaremodel.ParseProfileTypeSelector(t) if err != nil { return nil, err } - pTypesFromMetadata[pType] = profileType + pTypesFromMetadata[t] = profileType } } } diff --git a/pkg/frontend/read_path/query_frontend/query_profile_types_test.go b/pkg/frontend/read_path/query_frontend/query_profile_types_test.go index 9498961706..cc27e7d7a7 100644 --- a/pkg/frontend/read_path/query_frontend/query_profile_types_test.go +++ b/pkg/frontend/read_path/query_frontend/query_profile_types_test.go @@ -29,29 +29,25 @@ func TestQueryFrontend_ProfileTypes(t *testing.T) { Blocks: []*metastorev1.BlockMeta{ { Datasets: []*metastorev1.Dataset{ - { - ProfileTypes: []string{ - "memory:inuse_space:bytes:space:byte", - "process_cpu:cpu:nanoseconds:cpu:nanoseconds", - "mutex:delay:nanoseconds:mutex:count", - }, - }, - { - ProfileTypes: []string{ - "memory:alloc_in_new_tlab_objects:count:space:bytes", - "process_cpu:cpu:nanoseconds:cpu:nanoseconds", - }, - }, + {ProfileTypes: []int32{1, 2, 3}}, + {ProfileTypes: []int32{4, 2}}, + }, + StringTable: []string{ + "", + "memory:inuse_space:bytes:space:byte", + "process_cpu:cpu:nanoseconds:cpu:nanoseconds", + "mutex:delay:nanoseconds:mutex:count", + "memory:alloc_in_new_tlab_objects:count:space:bytes", }, }, { Datasets: []*metastorev1.Dataset{ - { - ProfileTypes: []string{ - "mutex:contentions:count:mutex:count", - "mutex:delay:nanoseconds:mutex:count", - }, - }, + {ProfileTypes: []int32{1, 2}}, + }, + StringTable: []string{ + "", + "mutex:contentions:count:mutex:count", + "mutex:delay:nanoseconds:mutex:count", }, }, }, diff --git a/pkg/test/mocks/mockcompactor/mock_block_queue_store.go b/pkg/test/mocks/mockcompactor/mock_block_queue_store.go index 91fd024a5e..b61b9c3119 100644 --- a/pkg/test/mocks/mockcompactor/mock_block_queue_store.go +++ b/pkg/test/mocks/mockcompactor/mock_block_queue_store.go @@ -3,13 +3,12 @@ package mockcompactor import ( - bbolt "go.etcd.io/bbolt" + "go.etcd.io/bbolt" - iter "github.com/grafana/pyroscope/pkg/iter" + "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction" + "github.com/grafana/pyroscope/pkg/iter" - mock "github.com/stretchr/testify/mock" - - store "github.com/grafana/pyroscope/pkg/experiment/metastore/compaction/compactor/store" + "github.com/stretchr/testify/mock" ) // MockBlockQueueStore is an autogenerated mock type for the BlockQueueStore type @@ -120,19 +119,19 @@ func (_c *MockBlockQueueStore_DeleteEntry_Call) RunAndReturn(run func(*bbolt.Tx, } // ListEntries provides a mock function with given fields: _a0 -func (_m *MockBlockQueueStore) ListEntries(_a0 *bbolt.Tx) iter.Iterator[store.BlockEntry] { +func (_m *MockBlockQueueStore) ListEntries(_a0 *bbolt.Tx) iter.Iterator[compaction.BlockEntry] { ret := _m.Called(_a0) if len(ret) == 0 { panic("no return value specified for ListEntries") } - var r0 iter.Iterator[store.BlockEntry] - if rf, ok := ret.Get(0).(func(*bbolt.Tx) iter.Iterator[store.BlockEntry]); ok { + var r0 iter.Iterator[compaction.BlockEntry] + if rf, ok := ret.Get(0).(func(*bbolt.Tx) iter.Iterator[compaction.BlockEntry]); ok { r0 = rf(_a0) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(iter.Iterator[store.BlockEntry]) + r0 = ret.Get(0).(iter.Iterator[compaction.BlockEntry]) } } @@ -157,18 +156,18 @@ func (_c *MockBlockQueueStore_ListEntries_Call) Run(run func(_a0 *bbolt.Tx)) *Mo return _c } -func (_c *MockBlockQueueStore_ListEntries_Call) Return(_a0 iter.Iterator[store.BlockEntry]) *MockBlockQueueStore_ListEntries_Call { +func (_c *MockBlockQueueStore_ListEntries_Call) Return(_a0 iter.Iterator[compaction.BlockEntry]) *MockBlockQueueStore_ListEntries_Call { _c.Call.Return(_a0) return _c } -func (_c *MockBlockQueueStore_ListEntries_Call) RunAndReturn(run func(*bbolt.Tx) iter.Iterator[store.BlockEntry]) *MockBlockQueueStore_ListEntries_Call { +func (_c *MockBlockQueueStore_ListEntries_Call) RunAndReturn(run func(*bbolt.Tx) iter.Iterator[compaction.BlockEntry]) *MockBlockQueueStore_ListEntries_Call { _c.Call.Return(run) return _c } // StoreEntry provides a mock function with given fields: _a0, _a1 -func (_m *MockBlockQueueStore) StoreEntry(_a0 *bbolt.Tx, _a1 store.BlockEntry) error { +func (_m *MockBlockQueueStore) StoreEntry(_a0 *bbolt.Tx, _a1 compaction.BlockEntry) error { ret := _m.Called(_a0, _a1) if len(ret) == 0 { @@ -176,7 +175,7 @@ func (_m *MockBlockQueueStore) StoreEntry(_a0 *bbolt.Tx, _a1 store.BlockEntry) e } var r0 error - if rf, ok := ret.Get(0).(func(*bbolt.Tx, store.BlockEntry) error); ok { + if rf, ok := ret.Get(0).(func(*bbolt.Tx, compaction.BlockEntry) error); ok { r0 = rf(_a0, _a1) } else { r0 = ret.Error(0) @@ -197,9 +196,9 @@ func (_e *MockBlockQueueStore_Expecter) StoreEntry(_a0 interface{}, _a1 interfac return &MockBlockQueueStore_StoreEntry_Call{Call: _e.mock.On("StoreEntry", _a0, _a1)} } -func (_c *MockBlockQueueStore_StoreEntry_Call) Run(run func(_a0 *bbolt.Tx, _a1 store.BlockEntry)) *MockBlockQueueStore_StoreEntry_Call { +func (_c *MockBlockQueueStore_StoreEntry_Call) Run(run func(_a0 *bbolt.Tx, _a1 compaction.BlockEntry)) *MockBlockQueueStore_StoreEntry_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*bbolt.Tx), args[1].(store.BlockEntry)) + run(args[0].(*bbolt.Tx), args[1].(compaction.BlockEntry)) }) return _c } @@ -209,7 +208,7 @@ func (_c *MockBlockQueueStore_StoreEntry_Call) Return(_a0 error) *MockBlockQueue return _c } -func (_c *MockBlockQueueStore_StoreEntry_Call) RunAndReturn(run func(*bbolt.Tx, store.BlockEntry) error) *MockBlockQueueStore_StoreEntry_Call { +func (_c *MockBlockQueueStore_StoreEntry_Call) RunAndReturn(run func(*bbolt.Tx, compaction.BlockEntry) error) *MockBlockQueueStore_StoreEntry_Call { _c.Call.Return(run) return _c } diff --git a/pkg/test/time.go b/pkg/test/time.go index 89bb8e66f6..2bce040f52 100644 --- a/pkg/test/time.go +++ b/pkg/test/time.go @@ -14,11 +14,16 @@ func ULID(t string) string { return l.String() } -func Time(t string) int64 { +func UnixMilli(t string) int64 { ts, _ := time.Parse(time.RFC3339, t) return ts.UnixMilli() } +func Time(t string) time.Time { + x, _ := time.Parse(time.RFC3339, t) + return x +} + func Duration(d string) time.Duration { parsed, _ := model.ParseDuration(d) return time.Duration(parsed)