diff --git a/common/item_sketch_float.go b/common/item_sketch_float.go new file mode 100644 index 0000000..eae77a9 --- /dev/null +++ b/common/item_sketch_float.go @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package common + +import ( + "encoding/binary" + "math" +) + +var ItemSketchFloatComparator = func(reverseOrder bool) CompareFn[float32] { + return func(a float32, b float32) bool { + if reverseOrder { + return a > b + } + return a < b + } +} + +// ItemSketchFloatSerDe handles serialization and deserialization of floating-point sketch items. +type ItemSketchFloatSerDe struct{} + +func (s ItemSketchFloatSerDe) SizeOf(item float32) int { + return 4 +} + +func (s ItemSketchFloatSerDe) SizeOfMany(mem []byte, offsetBytes int, numItems int) (int, error) { + return numItems * 4, nil +} + +func (s ItemSketchFloatSerDe) SerializeOneToSlice(item float32) []byte { + bytes := make([]byte, 4) + binary.LittleEndian.PutUint32(bytes, math.Float32bits(item)) + return bytes +} + +func (s ItemSketchFloatSerDe) SerializeManyToSlice(items []float32) []byte { + if len(items) == 0 { + return []byte{} + } + + bytes := make([]byte, 4*len(items)) + offset := 0 + for _, item := range items { + binary.LittleEndian.PutUint32(bytes[offset:], math.Float32bits(item)) + offset += 4 + } + return bytes +} + +func (s ItemSketchFloatSerDe) DeserializeManyFromSlice(mem []byte, offsetBytes int, numItems int) ([]float32, error) { + if numItems == 0 { + return []float32{}, nil + } + + array := make([]float32, 0, numItems) + for i := 0; i < numItems; i++ { + array = append(array, math.Float32frombits(binary.LittleEndian.Uint32(mem[offsetBytes:]))) + offsetBytes += 4 + } + return array, nil +} diff --git a/kll/items_sketch_serialization_test.go b/kll/items_sketch_serialization_test.go new file mode 100644 index 0000000..6896fd0 --- /dev/null +++ b/kll/items_sketch_serialization_test.go @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kll + +import ( + "fmt" + "os" + "strconv" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/apache/datasketches-go/common" + "github.com/apache/datasketches-go/internal" +) + +func TestGenerateGoFiles(t *testing.T) { + if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 { + t.Skipf("%s not set", internal.DSketchTestGenerateGo) + } + + os.MkdirAll(internal.GoPath, 0755) + + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + comparatorString := common.ItemSketchStringComparator(false) + for _, n := range nArr { + digits := numDigits(n) + sk, err := NewKllItemsSketchWithDefault[string](comparatorString, common.ItemSketchStringSerDe{}) + sk.deterministicOffsetForTest = true + assert.NoError(t, err) + for i := 1; i <= n; i++ { + sk.Update(intToFixedLengthString(i, digits)) + } + slc, err := sk.ToSlice() + assert.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/kll_string_n%d_go.sk", internal.GoPath, n), slc, 0644) + assert.NoError(t, err) + } + + comparatorDouble := common.ItemSketchDoubleComparator(false) + for _, n := range nArr { + sk, err := NewKllItemsSketchWithDefault[float64](comparatorDouble, common.ItemSketchDoubleSerDe{}) + sk.deterministicOffsetForTest = true + assert.NoError(t, err) + for i := 1; i <= n; i++ { + sk.Update(float64(i)) + } + slc, err := sk.ToSlice() + assert.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/kll_double_n%d_go.sk", internal.GoPath, n), slc, 0644) + assert.NoError(t, err) + } + + comparatorLong := common.ItemSketchLongComparator(false) + for _, n := range nArr { + sk, err := NewKllItemsSketchWithDefault[int64](comparatorLong, common.ItemSketchLongSerDe{}) + sk.deterministicOffsetForTest = true + assert.NoError(t, err) + for i := 1; i <= n; i++ { + sk.Update(int64(i)) + } + slc, err := sk.ToSlice() + assert.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/kll_long_n%d_go.sk", internal.GoPath, n), slc, 0644) + assert.NoError(t, err) + } + + comparatorFloat := common.ItemSketchFloatComparator(false) + for _, n := range nArr { + sk, err := NewKllItemsSketchWithDefault[float32](comparatorFloat, common.ItemSketchFloatSerDe{}) + sk.deterministicOffsetForTest = true + assert.NoError(t, err) + for i := 1; i <= n; i++ { + sk.Update(float32(i)) + } + slc, err := sk.ToSlice() + assert.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/kll_float_n%d_go.sk", internal.GoPath, n), slc, 0644) + assert.NoError(t, err) + } +} + +func TestJavaCompat(t *testing.T) { + t.Run("Java KLL String", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchStringSerDe{} + comparatorString := common.ItemSketchStringComparator(false) + for _, n := range nArr { + digits := numDigits(n) + filename := fmt.Sprintf("%s/kll_string_n%d_java.sk", internal.JavaPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[string](bytes, comparatorString, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, intToFixedLengthString(1, digits)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, intToFixedLengthString(n, digits)) + + weight := int64(0) + it := sketch.GetIterator() + compareFn := comparatorString + for it.Next() { + qut := it.GetQuantile() + assert.True(t, compareFn(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !compareFn(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("Java KLL Double", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchDoubleSerDe{} + comparatorDouble := common.ItemSketchDoubleComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_double_n%d_java.sk", internal.JavaPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[float64](bytes, comparatorDouble, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, float64(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, float64(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorDouble(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorDouble(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("Java KLL Long", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchLongSerDe{} + comparatorLong := common.ItemSketchLongComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_long_n%d_java.sk", internal.JavaPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[int64](bytes, comparatorLong, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, int64(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, int64(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorLong(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorLong(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("Java KLL Float", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchFloatSerDe{} + comparatorFloat := common.ItemSketchFloatComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_float_n%d_java.sk", internal.JavaPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("Java file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[float32](bytes, comparatorFloat, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, float32(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, float32(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorFloat(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorFloat(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) +} + +func TestCPPCompat(t *testing.T) { + // Note: CPP KLL String sketches use unpadded strings (e.g., "1", "10", "100") + // with numeric ordering, unlike Java/Go which use space-padded fixed-length strings + // with lexicographic ordering. + t.Run("CPP KLL String", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchStringSerDe{} + comparatorString := common.ItemSketchStringComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_string_n%d_cpp.sk", internal.CppPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("C++ file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[string](bytes, comparatorString, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, strconv.Itoa(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, strconv.Itoa(n)) + + weight := int64(0) + it := sketch.GetIterator() + // CPP sketches use numeric ordering for strings, so use a numeric comparator for bounds checks + numericLess := func(a, b string) bool { + ai, _ := strconv.Atoi(a) + bi, _ := strconv.Atoi(b) + return ai < bi + } + for it.Next() { + qut := it.GetQuantile() + assert.True(t, numericLess(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !numericLess(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("CPP KLL Double", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchDoubleSerDe{} + comparatorDouble := common.ItemSketchDoubleComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_double_n%d_cpp.sk", internal.CppPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("C++ file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[float64](bytes, comparatorDouble, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, float64(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, float64(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorDouble(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorDouble(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("CPP KLL Long", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchLongSerDe{} + comparatorLong := common.ItemSketchLongComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_long_n%d_cpp.sk", internal.CppPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("C++ file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[int64](bytes, comparatorLong, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, int64(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, int64(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorLong(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorLong(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) + + t.Run("CPP KLL Float", func(t *testing.T) { + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + serde := common.ItemSketchFloatSerDe{} + comparatorFloat := common.ItemSketchFloatComparator(false) + for _, n := range nArr { + filename := fmt.Sprintf("%s/kll_float_n%d_cpp.sk", internal.CppPath, n) + // Skip if file doesn't exist + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Skipf("C++ file not found: %s", filename) + return + } + bytes, err := os.ReadFile(filename) + assert.NoError(t, err) + sketch, err := NewKllItemsSketchFromSlice[float32](bytes, comparatorFloat, serde) + if err != nil { + return + } + + assert.Equal(t, sketch.GetK(), uint16(200)) + if n == 0 { + assert.True(t, sketch.IsEmpty()) + } else { + assert.False(t, sketch.IsEmpty()) + } + + if n > 100 { + assert.True(t, sketch.IsEstimationMode()) + } else { + assert.False(t, sketch.IsEstimationMode()) + } + + if n > 0 { + minV, err := sketch.GetMinItem() + assert.NoError(t, err) + assert.Equal(t, minV, float32(1)) + + maxV, err := sketch.GetMaxItem() + assert.NoError(t, err) + assert.Equal(t, maxV, float32(n)) + + weight := int64(0) + it := sketch.GetIterator() + for it.Next() { + qut := it.GetQuantile() + assert.True(t, comparatorFloat(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) + assert.True(t, !comparatorFloat(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) + weight += it.GetWeight() + } + assert.Equal(t, weight, int64(n)) + } + } + }) +} diff --git a/kll/items_sletch_serialization_test.go b/kll/items_sletch_serialization_test.go deleted file mode 100644 index e380066..0000000 --- a/kll/items_sletch_serialization_test.go +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package kll - -import ( - "fmt" - "os" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/apache/datasketches-go/common" - "github.com/apache/datasketches-go/internal" -) - -func TestGenerateGoFiles(t *testing.T) { - if len(os.Getenv(internal.DSketchTestGenerateGo)) == 0 { - t.Skipf("%s not set", internal.DSketchTestGenerateGo) - } - - os.MkdirAll(internal.GoPath, 0755) - - nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} - comparatorString := common.ItemSketchStringComparator(false) - for _, n := range nArr { - digits := numDigits(n) - sk, err := NewKllItemsSketchWithDefault[string](comparatorString, common.ItemSketchStringSerDe{}) - sk.deterministicOffsetForTest = true - assert.NoError(t, err) - for i := 1; i <= n; i++ { - sk.Update(intToFixedLengthString(i, digits)) - } - slc, err := sk.ToSlice() - assert.NoError(t, err) - err = os.WriteFile(fmt.Sprintf("%s/kll_string_n%d_go.sk", internal.GoPath, n), slc, 0644) - assert.NoError(t, err) - } - - comparatorDouble := common.ItemSketchDoubleComparator(false) - for _, n := range nArr { - sk, err := NewKllItemsSketchWithDefault[float64](comparatorDouble, common.ItemSketchDoubleSerDe{}) - sk.deterministicOffsetForTest = true - assert.NoError(t, err) - for i := 1; i <= n; i++ { - sk.Update(float64(i)) - } - slc, err := sk.ToSlice() - assert.NoError(t, err) - err = os.WriteFile(fmt.Sprintf("%s/kll_double_n%d_go.sk", internal.GoPath, n), slc, 0644) - assert.NoError(t, err) - } - - comparatorLong := common.ItemSketchLongComparator(false) - for _, n := range nArr { - sk, err := NewKllItemsSketchWithDefault[int64](comparatorLong, common.ItemSketchLongSerDe{}) - sk.deterministicOffsetForTest = true - assert.NoError(t, err) - for i := 1; i <= n; i++ { - sk.Update(int64(i)) - } - slc, err := sk.ToSlice() - assert.NoError(t, err) - err = os.WriteFile(fmt.Sprintf("%s/kll_long_n%d_go.sk", internal.GoPath, n), slc, 0644) - assert.NoError(t, err) - } -} - -func TestJavaCompat(t *testing.T) { - t.Run("Java KLL String", func(t *testing.T) { - nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} - serde := common.ItemSketchStringSerDe{} - comparatorString := common.ItemSketchStringComparator(false) - for _, n := range nArr { - digits := numDigits(n) - filename := fmt.Sprintf("%s/kll_string_n%d_java.sk", internal.JavaPath, n) - // Skip if file doesn't exist - if _, err := os.Stat(filename); os.IsNotExist(err) { - t.Skipf("Java file not found: %s", filename) - return - } - bytes, err := os.ReadFile(filename) - assert.NoError(t, err) - sketch, err := NewKllItemsSketchFromSlice[string](bytes, comparatorString, serde) - if err != nil { - return - } - - assert.Equal(t, sketch.GetK(), uint16(200)) - if n == 0 { - assert.True(t, sketch.IsEmpty()) - } else { - assert.False(t, sketch.IsEmpty()) - } - - if n > 100 { - assert.True(t, sketch.IsEstimationMode()) - } else { - assert.False(t, sketch.IsEstimationMode()) - } - - if n > 0 { - minV, err := sketch.GetMinItem() - assert.NoError(t, err) - assert.Equal(t, minV, intToFixedLengthString(1, digits)) - - maxV, err := sketch.GetMaxItem() - assert.NoError(t, err) - assert.Equal(t, maxV, intToFixedLengthString(n, digits)) - - weight := int64(0) - it := sketch.GetIterator() - compareFn := comparatorString - for it.Next() { - qut := it.GetQuantile() - assert.True(t, compareFn(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) - assert.True(t, !compareFn(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) - weight += it.GetWeight() - } - assert.Equal(t, weight, int64(n)) - } - } - }) - - t.Run("Java KLL Double", func(t *testing.T) { - nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} - serde := common.ItemSketchDoubleSerDe{} - comparatorDouble := common.ItemSketchDoubleComparator(false) - for _, n := range nArr { - filename := fmt.Sprintf("%s/kll_double_n%d_java.sk", internal.JavaPath, n) - // Skip if file doesn't exist - if _, err := os.Stat(filename); os.IsNotExist(err) { - t.Skipf("Java file not found: %s", filename) - return - } - bytes, err := os.ReadFile(filename) - assert.NoError(t, err) - sketch, err := NewKllItemsSketchFromSlice[float64](bytes, comparatorDouble, serde) - if err != nil { - return - } - - assert.Equal(t, sketch.GetK(), uint16(200)) - if n == 0 { - assert.True(t, sketch.IsEmpty()) - } else { - assert.False(t, sketch.IsEmpty()) - } - - if n > 100 { - assert.True(t, sketch.IsEstimationMode()) - } else { - assert.False(t, sketch.IsEstimationMode()) - } - - if n > 0 { - minV, err := sketch.GetMinItem() - assert.NoError(t, err) - assert.Equal(t, minV, float64(1)) - - maxV, err := sketch.GetMaxItem() - assert.NoError(t, err) - assert.Equal(t, maxV, float64(n)) - - weight := int64(0) - it := sketch.GetIterator() - for it.Next() { - qut := it.GetQuantile() - assert.True(t, comparatorDouble(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) - assert.True(t, !comparatorDouble(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) - weight += it.GetWeight() - } - assert.Equal(t, weight, int64(n)) - } - } - }) - - t.Run("Java KLL Long", func(t *testing.T) { - nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} - serde := common.ItemSketchLongSerDe{} - comparatorLong := common.ItemSketchLongComparator(false) - for _, n := range nArr { - filename := fmt.Sprintf("%s/kll_long_n%d_java.sk", internal.JavaPath, n) - // Skip if file doesn't exist - if _, err := os.Stat(filename); os.IsNotExist(err) { - t.Skipf("Java file not found: %s", filename) - return - } - bytes, err := os.ReadFile(filename) - assert.NoError(t, err) - sketch, err := NewKllItemsSketchFromSlice[int64](bytes, comparatorLong, serde) - if err != nil { - return - } - - assert.Equal(t, sketch.GetK(), uint16(200)) - if n == 0 { - assert.True(t, sketch.IsEmpty()) - } else { - assert.False(t, sketch.IsEmpty()) - } - - if n > 100 { - assert.True(t, sketch.IsEstimationMode()) - } else { - assert.False(t, sketch.IsEstimationMode()) - } - - if n > 0 { - minV, err := sketch.GetMinItem() - assert.NoError(t, err) - assert.Equal(t, minV, int64(1)) - - maxV, err := sketch.GetMaxItem() - assert.NoError(t, err) - assert.Equal(t, maxV, int64(n)) - - weight := int64(0) - it := sketch.GetIterator() - for it.Next() { - qut := it.GetQuantile() - assert.True(t, comparatorLong(minV, qut) || minV == qut, fmt.Sprintf("min: \"%v\" \"%v\"", minV, qut)) - assert.True(t, !comparatorLong(maxV, qut) || maxV == qut, fmt.Sprintf("max: \"%v\" \"%v\"", maxV, qut)) - weight += it.GetWeight() - } - assert.Equal(t, weight, int64(n)) - } - } - }) -} diff --git a/serialization_test_data/cpp_generated_files/kll_double_n1000000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_double_n1000000_cpp.sk index 43015c2..d27acdb 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_double_n1000000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_double_n1000000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_double_n100000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_double_n100000_cpp.sk index 560be1e..6dfe3a5 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_double_n100000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_double_n100000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_double_n10000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_double_n10000_cpp.sk index a865f73..7577f7f 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_double_n10000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_double_n10000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_double_n1000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_double_n1000_cpp.sk index a905b61..70b05f3 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_double_n1000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_double_n1000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n0_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n0_cpp.sk new file mode 100644 index 0000000..afd2209 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n0_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n1000000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n1000000_cpp.sk new file mode 100644 index 0000000..9ba0bac Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n1000000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n100000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n100000_cpp.sk new file mode 100644 index 0000000..2288d6a Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n100000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n10000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n10000_cpp.sk new file mode 100644 index 0000000..3826472 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n10000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n1000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n1000_cpp.sk new file mode 100644 index 0000000..943fd91 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n1000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n100_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n100_cpp.sk new file mode 100644 index 0000000..ad45e64 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n100_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n10_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n10_cpp.sk new file mode 100644 index 0000000..1fd87d3 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n10_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_float_n1_cpp.sk b/serialization_test_data/cpp_generated_files/kll_float_n1_cpp.sk new file mode 100644 index 0000000..f7f9d22 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_float_n1_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n0_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n0_cpp.sk new file mode 100644 index 0000000..afd2209 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n0_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n1000000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n1000000_cpp.sk new file mode 100644 index 0000000..8b29906 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n1000000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n100000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n100000_cpp.sk new file mode 100644 index 0000000..6915b54 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n100000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n10000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n10000_cpp.sk new file mode 100644 index 0000000..0c77f1c Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n10000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n1000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n1000_cpp.sk new file mode 100644 index 0000000..7e6a582 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n1000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n100_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n100_cpp.sk new file mode 100644 index 0000000..43eb956 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n100_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n10_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n10_cpp.sk new file mode 100644 index 0000000..f935b89 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n10_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_long_n1_cpp.sk b/serialization_test_data/cpp_generated_files/kll_long_n1_cpp.sk new file mode 100644 index 0000000..317ce97 Binary files /dev/null and b/serialization_test_data/cpp_generated_files/kll_long_n1_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_string_n1000000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_string_n1000000_cpp.sk index 0a286fd..1026c3d 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_string_n1000000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_string_n1000000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_string_n100000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_string_n100000_cpp.sk index 5166b97..e543819 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_string_n100000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_string_n100000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_string_n10000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_string_n10000_cpp.sk index 6384476..0317daf 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_string_n10000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_string_n10000_cpp.sk differ diff --git a/serialization_test_data/cpp_generated_files/kll_string_n1000_cpp.sk b/serialization_test_data/cpp_generated_files/kll_string_n1000_cpp.sk index 76ad717..03f44e9 100644 Binary files a/serialization_test_data/cpp_generated_files/kll_string_n1000_cpp.sk and b/serialization_test_data/cpp_generated_files/kll_string_n1000_cpp.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n0_go.sk b/serialization_test_data/go_generated_files/kll_float_n0_go.sk new file mode 100644 index 0000000..afd2209 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n0_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n1000000_go.sk b/serialization_test_data/go_generated_files/kll_float_n1000000_go.sk new file mode 100644 index 0000000..85260fa Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n1000000_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n100000_go.sk b/serialization_test_data/go_generated_files/kll_float_n100000_go.sk new file mode 100644 index 0000000..fbafd72 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n100000_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n10000_go.sk b/serialization_test_data/go_generated_files/kll_float_n10000_go.sk new file mode 100644 index 0000000..7382f00 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n10000_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n1000_go.sk b/serialization_test_data/go_generated_files/kll_float_n1000_go.sk new file mode 100644 index 0000000..246299d Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n1000_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n100_go.sk b/serialization_test_data/go_generated_files/kll_float_n100_go.sk new file mode 100644 index 0000000..ad45e64 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n100_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n10_go.sk b/serialization_test_data/go_generated_files/kll_float_n10_go.sk new file mode 100644 index 0000000..1fd87d3 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n10_go.sk differ diff --git a/serialization_test_data/go_generated_files/kll_float_n1_go.sk b/serialization_test_data/go_generated_files/kll_float_n1_go.sk new file mode 100644 index 0000000..f7f9d22 Binary files /dev/null and b/serialization_test_data/go_generated_files/kll_float_n1_go.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n0_java.sk b/serialization_test_data/java_generated_files/kll_float_n0_java.sk new file mode 100644 index 0000000..afd2209 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n0_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n1000000_java.sk b/serialization_test_data/java_generated_files/kll_float_n1000000_java.sk new file mode 100644 index 0000000..56ff576 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n1000000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n100000_java.sk b/serialization_test_data/java_generated_files/kll_float_n100000_java.sk new file mode 100644 index 0000000..78b8bdf Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n100000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n10000_java.sk b/serialization_test_data/java_generated_files/kll_float_n10000_java.sk new file mode 100644 index 0000000..db00541 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n10000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n1000_java.sk b/serialization_test_data/java_generated_files/kll_float_n1000_java.sk new file mode 100644 index 0000000..25e1d61 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n1000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n100_java.sk b/serialization_test_data/java_generated_files/kll_float_n100_java.sk new file mode 100644 index 0000000..ad45e64 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n100_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n10_java.sk b/serialization_test_data/java_generated_files/kll_float_n10_java.sk new file mode 100644 index 0000000..1fd87d3 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n10_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_float_n1_java.sk b/serialization_test_data/java_generated_files/kll_float_n1_java.sk new file mode 100644 index 0000000..f7f9d22 Binary files /dev/null and b/serialization_test_data/java_generated_files/kll_float_n1_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_string_n1000000_java.sk b/serialization_test_data/java_generated_files/kll_string_n1000000_java.sk index e5d0a29..ff8c83c 100644 Binary files a/serialization_test_data/java_generated_files/kll_string_n1000000_java.sk and b/serialization_test_data/java_generated_files/kll_string_n1000000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_string_n100000_java.sk b/serialization_test_data/java_generated_files/kll_string_n100000_java.sk index e3382f2..0188134 100644 Binary files a/serialization_test_data/java_generated_files/kll_string_n100000_java.sk and b/serialization_test_data/java_generated_files/kll_string_n100000_java.sk differ diff --git a/serialization_test_data/java_generated_files/kll_string_n10000_java.sk b/serialization_test_data/java_generated_files/kll_string_n10000_java.sk index 8c4ba2d..8399256 100644 Binary files a/serialization_test_data/java_generated_files/kll_string_n10000_java.sk and b/serialization_test_data/java_generated_files/kll_string_n10000_java.sk differ