Skip to content

Commit

Permalink
wordset: add new type for interning word sets
Browse files Browse the repository at this point in the history
  • Loading branch information
jbowens committed Nov 16, 2020
1 parent 2cd3b77 commit 964cc4e
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 0 deletions.
64 changes: 64 additions & 0 deletions wordset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package codenames

import (
"crypto/sha1"
"errors"
"fmt"
"io"
"sort"
"strings"
"sync"
)

type wordSetID [sha1.Size]byte

func (i wordSetID) String() string {
return fmt.Sprintf("%x", i[:])
}

type WordSets struct {
mu sync.Mutex
byID map[wordSetID][]string
}

func (ws *WordSets) init() {
if ws.byID == nil {
ws.byID = make(map[wordSetID][]string)
}
}

func (ws *WordSets) Canonicalize(words []string) (wordSetID, []string, error) {
ws.mu.Lock()
defer ws.mu.Unlock()
ws.init()

set := map[string]bool{}
for _, w := range words {
set[strings.TrimSpace(strings.ToUpper(w))] = true
}
if len(set) > 0 && len(set) < 25 {
return wordSetID{}, nil, errors.New("need at least 25 words")
}

words = words[:0]
for w := range set {
words = append(words, w)
}
sort.Strings(words)

// Calculate the word set ID, a hash of the canonicalized word set.
h := sha1.New()
for _, w := range words {
io.WriteString(h, w)
h.Write([]byte{0x00})
}
idBytes := h.Sum(nil)
var id wordSetID
copy(id[:], idBytes)

if interned, ok := ws.byID[id]; ok {
return id, interned, nil
}
ws.byID[id] = words
return id, words, nil
}
43 changes: 43 additions & 0 deletions wordset_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package codenames

import (
"bytes"
"encoding/json"
"io/ioutil"
"testing"
)

func TestWordSetCanonicalize(t *testing.T) {
b, err := ioutil.ReadFile("frontend/words.json")
if err != nil {
t.Fatal(err)
}
var defaultWordsets map[string][]string
err = json.NewDecoder(bytes.NewReader(b)).Decode(&defaultWordsets)
if err != nil {
t.Fatal(err)
}

internedSets := map[string][]string{}

var ws WordSets
for name, words := range defaultWordsets {
id, interned, err := ws.Canonicalize(words)
if err != nil {
t.Fatal(err)
}
t.Logf("%s : %s\n", name, id)
internedSets[name] = interned
}

for name, words := range defaultWordsets {
words2 := append([]string{}, words...)
_, interned, err := ws.Canonicalize(words2)
if err != nil {
t.Fatal(err)
}
if &internedSets[name][0] != &interned[0] {
t.Errorf("word set %q has different slice pointer 2nd canonicalization", name)
}
}
}

0 comments on commit 964cc4e

Please sign in to comment.