From 6bb99a243a4f52c3e403ba6f290a4934e46ed3b3 Mon Sep 17 00:00:00 2001 From: Thanapat Chotipun Date: Wed, 18 Dec 2024 15:55:39 +0700 Subject: [PATCH] feat: init cuid2 --- .gitignore | 15 ++++ LICENSE | 21 +++++ README.md | 42 ++++++++++ benchmark_test.go | 28 +++++++ collision_test.go | 209 ++++++++++++++++++++++++++++++++++++++++++++++ cuid2.go | 203 ++++++++++++++++++++++++++++++++++++++++++++ cuid2_test.go | 107 ++++++++++++++++++++++++ go.mod | 7 ++ go.sum | 4 + 9 files changed, 636 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 benchmark_test.go create mode 100644 collision_test.go create mode 100644 cuid2.go create mode 100644 cuid2_test.go create mode 100644 go.mod create mode 100644 go.sum diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1d5f20 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Go workspace file +go.work diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..729b044 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 PatrickChoDev. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..adac127 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# CUID2 + +Go implementation of [CUID2](https://github.com/paralleldrive/cuid2) - Collision-resistant ids optimized for horizontal scaling and performance. + +## Installation +```bash +go get github.com/PatrickChoDev/cuid2 +``` + +## Usage +```go +import "github.com/PatrickChoDev/cuid2" + +// Generate a CUID with default settings +id := cuid2.Generate() // Example: "tz4a98xxat96" + +// Initialize with custom options +generator, err := cuid2.Init( + cuid2.WithLength(10), // Custom length (2-32) + cuid2.WithRandomFunc(rand.Float64), // Custom random function + cuid2.WithFingerprint("myapp"), // Custom fingerprint +) +if err != nil { + // Handle error +} +id = generator() // Generate using custom settings + +// Validate a CUID +valid := cuid2.IsCuid("tz4a98xxat96") // Returns true/false +``` + +## Features +- Thread-safe ID generation +- Configurable length (2-32 characters) +- Customizable random function +- System fingerprinting for distributed systems +- Validation function +- Zero external dependencies (except crypto/sha3) + +## License +MIT License + diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 0000000..b9a63ab --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,28 @@ +package cuid2 + +import ( + "log" + "testing" +) + +var result string + +func benchmarkGenerate(b *testing.B, length int) { + var id string + + generate, err := Init(WithLength(length)) + if err != nil { + log.Fatalln("Error: Could not initialise Cuid2 generator") + } + + for n := 0; n < b.N; n++ { + id = generate() + } + + result = id +} + +func BenchmarkGenerate8(b *testing.B) { benchmarkGenerate(b, 8) } +func BenchmarkGenerate16(b *testing.B) { benchmarkGenerate(b, 16) } +func BenchmarkGenerate24(b *testing.B) { benchmarkGenerate(b, 24) } +func BenchmarkGenerate32(b *testing.B) { benchmarkGenerate(b, 32) } diff --git a/collision_test.go b/collision_test.go new file mode 100644 index 0000000..81088fe --- /dev/null +++ b/collision_test.go @@ -0,0 +1,209 @@ +package cuid2 + +import ( + "fmt" + "log" + "math" + "math/big" + "strconv" + "sync" + "testing" +) + +func TestCollisions(t *testing.T) { + n := int64(math.Pow(float64(7), float64(8)) * 2) + log.Printf("Testing %v unique Cuids...", n) + + numPools := int64(7) + pools := createIdPools(numPools, n/numPools) + + ids := []string{} + for _, pool := range pools { + ids = append(ids, pool.ids...) + } + + sampleIds := ids[:10] + set := map[string]struct{}{} + for _, id := range ids { + set[id] = struct{}{} + } + + histogram := pools[0].histogram + + log.Println("Sample Cuids:", sampleIds) + log.Println("Histogram:", histogram) + + expectedBinSize := math.Ceil(float64(n / numPools / int64(len(histogram)))) + tolerance := 0.05 + minBinSize := math.Round(expectedBinSize * (1 - tolerance)) + maxBinSize := math.Round(expectedBinSize * (1 + tolerance)) + log.Println("Expected bin size:", expectedBinSize) + log.Println("Min bin size:", minBinSize) + log.Println("Maximum bin size:", maxBinSize) + + collisionsDetected := int64(len(set)) - n + if collisionsDetected > 0 { + t.Fatalf("%v collisions detected", int64(len(set))-n) + } + + for _, binSize := range histogram { + withinDistributionTolerance := binSize > minBinSize && binSize < maxBinSize + if !withinDistributionTolerance { + t.Errorf("Histogram of generated Cuids is not within the distribution tolerance of %v", tolerance) + t.Fatalf("Expected bin size: %v, min: %v, max: %v, actual: %v", expectedBinSize, minBinSize, maxBinSize, binSize) + } + } + + validateCuids(t, ids) +} + +type IdPool struct { + ids []string + numbers []big.Int + histogram []float64 +} + +func NewIdPool(max int) func() *IdPool { + return func() *IdPool { + + set := map[string]struct{}{} + + for i := 0; i < max; i++ { + set[Generate()] = struct{}{} + if i%100000 == 0 { + progress := float64(i) / float64(max) + log.Printf("%d%%", int64(progress*100)) + } + if len(set) < i { + log.Printf("Collision at: %v", i) + break + } + } + + log.Println("No collisions detected") + + ids := []string{} + numbers := []big.Int{} + + for element := range set { + ids = append(ids, element) + numbers = append(numbers, *idToBigInt(element[1:])) + } + + return &IdPool{ + ids: ids, + numbers: numbers, + histogram: buildHistogram(numbers, 20), + } + } +} + +func idToBigInt(id string) *big.Int { + bigInt := new(big.Int) + for _, char := range id { + base36Rune, _ := strconv.ParseInt(string(char), 36, 64) + bigInt.Add(big.NewInt(base36Rune), bigInt.Mul(bigInt, big.NewInt(36))) + } + return bigInt +} + +func buildHistogram(numbers []big.Int, bucketCount int) []float64 { + log.Println("Building histogram...") + + buckets := make([]float64, bucketCount) + counter := 1 + + numPermutations, _ := big.NewFloat(math.Pow(float64(36), float64(DefaultIdLength-1))).Int(nil) + bucketLength := new(big.Int).Div( + numPermutations, + big.NewInt(int64(bucketCount)), + ) + + for _, number := range numbers { + + if new(big.Int).Mod(big.NewInt(int64(counter)), bucketLength).Int64() == 0 { + log.Println(number) + } + + bucket := new(big.Int).Div( + &number, + bucketLength, + ) + + if new(big.Int).Mod(big.NewInt(int64(counter)), bucketLength).Int64() == 0 { + log.Println(bucket) + } + + buckets[bucket.Int64()]++ + counter++ + } + + return buckets +} + +func worker(id int, jobs <-chan func() *IdPool, results chan<- *IdPool) { + for job := range jobs { + log.Println("worker", id, "started job") + results <- job() + } +} + +func createIdPools(numPools int64, maxIdsPerPool int64) []*IdPool { + + jobsList := []func() *IdPool{} + for i := 0; i < int(numPools); i++ { + jobsList = append(jobsList, NewIdPool(int(maxIdsPerPool))) + } + + jobs := make(chan func() *IdPool, numPools) + results := make(chan *IdPool, numPools) + + for w := 1; w <= int(numPools); w++ { + go worker(w, jobs, results) + } + + for _, job := range jobsList { + jobs <- job + } + close(jobs) + + pools := []*IdPool{} + for a := 1; a <= int(numPools); a++ { + pool := <-results + pools = append(pools, pool) + } + + return pools +} + +func validateCuids(t *testing.T, ids []string) { + log.Printf("Validating all %v Cuids...", len(ids)) + + wg := new(sync.WaitGroup) + validationErrors := make(chan error) + + for _, id := range ids { + wg.Add(1) + go func(id string) { + defer wg.Done() + if !IsCuid(id) { + validationErrors <- fmt.Errorf("Cuid (%v) is not valid", id) + } + }(id) + } + + go func() { + wg.Wait() + close(validationErrors) + }() + + numInvalidIds := 0 + for err := range validationErrors { + log.Println(err.Error()) + numInvalidIds++ + } + + if numInvalidIds > 0 { + t.Fatalf("%v Cuids were invalid", numInvalidIds) + } +} diff --git a/cuid2.go b/cuid2.go new file mode 100644 index 0000000..10da76e --- /dev/null +++ b/cuid2.go @@ -0,0 +1,203 @@ +package cuid2 + +import ( + "fmt" + "math" + "math/big" + "math/rand" + "os" + "regexp" + "strconv" + "strings" + "sync/atomic" + "time" + + "golang.org/x/crypto/sha3" +) + +const ( + DefaultIdLength int = 24 + MinIdLength int = 2 + MaxIdLength int = 32 + + // ~22k hosts before 50% chance of initial counter collision + MaxSessionCount int64 = 476782367 +) + +type Config struct { + // A custom function that can generate a floating-point value between 0 and 1 + RandomFunc func() float64 + + // A counter that will be used to affect the entropy of successive id + // generation calls + SessionCounter Counter + + // Length of the generated Cuid, min = 2, max = 32 + Length int + + // A unique string that will be used by the Cuid generator to help prevent + // collisions when generating Cuids in a distributed system. + Fingerprint string +} + +type Counter interface { + Increment() int64 +} + +type SessionCounter struct { + value int64 +} + +func NewSessionCounter(initialCount int64) *SessionCounter { + return &SessionCounter{value: initialCount} +} + +func (sc *SessionCounter) Increment() int64 { + return atomic.AddInt64(&sc.value, 1) +} + +type Option func(*Config) error + +// Initializes the Cuid generator with default or user-defined config options +// +// Returns a function that can be called to generate Cuids using the initialized config +func Init(options ...Option) (func() string, error) { + initialSessionCount := int64( + math.Floor(rand.Float64() * float64(MaxSessionCount)), + ) + + config := &Config{ + RandomFunc: rand.Float64, + SessionCounter: NewSessionCounter(initialSessionCount), + Length: DefaultIdLength, + Fingerprint: createFingerprint(rand.Float64, getEnvironmentKeyString()), + } + + for _, option := range options { + if option != nil { + if applyErr := option(config); applyErr != nil { + return func() string { return "" }, applyErr + } + } + } + + return func() string { + firstLetter := getRandomAlphabet(config.RandomFunc) + time := strconv.FormatInt(time.Now().UnixMilli(), 36) + count := strconv.FormatInt(config.SessionCounter.Increment(), 36) + salt := createEntropy(config.Length, config.RandomFunc) + hashInput := time + salt + count + config.Fingerprint + hashDigest := firstLetter + hash(hashInput)[1:config.Length] + + return hashDigest + }, nil +} + +// Generates Cuids using default config options +var Generate, _ = Init() + +// Checks whether a given Cuid has a valid form and length +func IsCuid(cuid string) bool { + length := len(cuid) + hasValidForm, _ := regexp.MatchString("^[a-z][0-9a-z]+$", cuid) + + if hasValidForm && length >= MinIdLength && length <= MaxIdLength { + return true + } + + return false +} + +// A custom function that will generate a random floating-point value between 0 and 1 +func WithRandomFunc(randomFunc func() float64) Option { + return func(config *Config) error { + randomness := randomFunc() + if randomness < 0 || randomness > 1 { + return fmt.Errorf("Error: the provided random function does not generate a value between 0 and 1") + } + config.RandomFunc = randomFunc + return nil + } +} + +// A custom counter that will be used to affect the entropy of successive id +// generation calls +func WithSessionCounter(sessionCounter Counter) Option { + return func(config *Config) error { + config.SessionCounter = sessionCounter + return nil + } +} + +// Configures the length of the generated Cuid +// +// Min Length = 2, Max Length = 32 +func WithLength(length int) Option { + return func(config *Config) error { + if length < MinIdLength || length > MaxIdLength { + return fmt.Errorf("Error: Can only generate Cuid's with a length between %v and %v", MinIdLength, MaxIdLength) + } + config.Length = length + return nil + } +} + +// A unique string that will be used by the id generator to help prevent +// collisions when generating Cuids in a distributed system. +func WithFingerprint(fingerprint string) Option { + return func(config *Config) error { + config.Fingerprint = fingerprint + return nil + } +} + +func createFingerprint(randomFunc func() float64, envKeyString string) string { + sourceString := createEntropy(MaxIdLength, randomFunc) + + if len(envKeyString) > 0 { + sourceString += envKeyString + } + + sourceStringHash := hash(sourceString) + + return sourceStringHash[1:] +} + +func createEntropy(length int, randomFunc func() float64) string { + entropy := "" + + for len(entropy) < length { + randomness := int64(math.Floor(randomFunc() * 36)) + entropy += strconv.FormatInt(randomness, 36) + } + + return entropy +} + +func getEnvironmentKeyString() string { + env := os.Environ() + + keys := []string{} + + // Discard values of environment variables + for _, variable := range env { + key := variable[:strings.IndexByte(variable, '=')] + keys = append(keys, key) + } + + return strings.Join(keys, "") +} + +func hash(input string) string { + hash := sha3.New512() + hash.Write([]byte(input)) + hashDigest := hash.Sum(nil) + return new(big.Int).SetBytes(hashDigest).Text(36)[1:] +} + +func getRandomAlphabet(randomFunc func() float64) string { + alphabets := "abcdefghijklmnopqrstuvwxyz" + randomIndex := int64(math.Floor(randomFunc() * 26)) + randomAlphabet := string(alphabets[randomIndex]) + return randomAlphabet +} diff --git a/cuid2_test.go b/cuid2_test.go new file mode 100644 index 0000000..7442811 --- /dev/null +++ b/cuid2_test.go @@ -0,0 +1,107 @@ +package cuid2 + +import ( + "math/rand" + "testing" +) + +// External Tests +func TestIsCuid(t *testing.T) { + testCases := map[string]bool{ + Generate(): true, // Default + Generate() + Generate() + Generate(): false, // Too Long + "": false, // Too Short + "42": false, // Non-CUID + "aaaaDLL": false, // Capital letters + "yi7rqj1trke": true, // Valid + "-x!ha": false, // Invalid characters + "ab*%@#x": false, // Invalid characters + } + + for testCase, expected := range testCases { + if IsCuid(testCase) != expected { + t.Fatalf("Expected IsCuid(%v) to be %v, but got %v", testCase, expected, !expected) + } + } +} + +func TestGeneratingInvalidCuid(t *testing.T) { + _, err := Init(WithLength(64)) + if err == nil { + t.Fatalf( + "Expected to receive an error for Init(WithLength(64)), but got nothing", + ) + } +} + +func TestDefaultCuidLength(t *testing.T) { + cuid := Generate() + if len(cuid) != DefaultIdLength { + t.Fatalf("Expected default Cuid length to be %v, but got %v", DefaultIdLength, len(cuid)) + } +} + +func TestGeneratingCuidWithCustomLength(t *testing.T) { + customLength := 16 + generate, err := Init(WithLength(customLength)) + if err != nil { + t.Fatalf("Expected to initialize cuid2 generator but received error = %v", err.Error()) + } + + cuid := generate() + + if len(cuid) != customLength { + t.Fatalf("Expected to generate Cuid with a custom length of %v, but got %v", customLength, len(cuid)) + } +} + +func TestGeneratingCuidWithMaxLength(t *testing.T) { + generate, err := Init(WithLength(MaxIdLength)) + if err != nil { + t.Fatalf("Expected to initialize cuid2 generator but received error = %v", err.Error()) + } + + cuid := generate() + + if len(cuid) != MaxIdLength { + t.Fatalf("Expected to generate Cuid with a max length of %v, but got %v", MaxIdLength, cuid) + } +} + +// Internal Tests +func TestSessionCounter(t *testing.T) { + var initialSessionCount int64 = 10 + sessionCounter := NewSessionCounter(initialSessionCount) + expectedCounts := []int64{11, 12, 13, 14} + actualCounts := []int64{ + sessionCounter.Increment(), + sessionCounter.Increment(), + sessionCounter.Increment(), + sessionCounter.Increment(), + } + + for index, actualCount := range actualCounts { + expectedCount := expectedCounts[index] + if actualCount != expectedCount { + t.Error("Expected session counts to increment by one for each successive call") + t.Errorf("For an initial session count of %v, expected %v", initialSessionCount, expectedCounts) + t.Fatalf("Got %v", actualCounts) + } + } +} + +func TestCreatingFingerprintWithEnvKeyString(t *testing.T) { + fingerprint := createFingerprint(rand.Float64, getEnvironmentKeyString()) + if len(fingerprint) < MinIdLength { + t.Error("Could not generate fingerprint of adequate length") + t.Fatalf("Expected length to be at least %v, but got %v", MinIdLength, len(fingerprint)) + } +} + +func TestCreatingFingerprintWithoutEnvKeyString(t *testing.T) { + fingerprint := createFingerprint(rand.Float64, "") + if len(fingerprint) < MinIdLength { + t.Error("Could not generate fingerprint of adequate length") + t.Fatalf("Expected length to be at least %v, but got %v", MinIdLength, len(fingerprint)) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1f942cf --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module github.com/PatrickChoDev/cuid2 + +go 1.23 + +require golang.org/x/crypto v0.31.0 + +require golang.org/x/sys v0.28.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0b273f9 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=