From 2389cce0e97d5e9ffad4f468361af9c787ad26f1 Mon Sep 17 00:00:00 2001 From: Albert Pastrana Date: Wed, 23 May 2018 21:07:40 +0200 Subject: [PATCH] Adds tests for ActionConfig.create() During the process, I've moved the validation from `Config` to each specific action. Currently this only affects year and ranges, whose create can return an error. Because of this, the anonymisations function can now fail and returns an error in case there name can't be match or if the specific action can't be created. Signed-off-by: Albert Pastrana --- README.md | 5 +- anonymisations.go | 39 ++++++--- anonymisations_test.go | 191 +++++++++++++++++++++++++++++++++-------- config.go | 16 ---- config_test.go | 89 ------------------- main.go | 10 ++- main_test.go | 2 +- 7 files changed, 194 insertions(+), 158 deletions(-) diff --git a/README.md b/README.md index 04d0574..a29cb4d 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,10 @@ In order to be useful, Anon needs to be told what you want to do to each column }, { // Hash (SHA1) the input. - "name": "hash" + "name": "hash", + // Optional salt that will be appened to the input. + // If not defined, a random salt will be generated + "salt": "salt" }, { // Given a date, just keep the year. diff --git a/anonymisations.go b/anonymisations.go index 9167f3b..7ff53f9 100644 --- a/anonymisations.go +++ b/anonymisations.go @@ -37,12 +37,15 @@ type ActionConfig struct { } // Returns an array of anonymisations according to the config -func anonymisations(configs *[]ActionConfig) []Anonymisation { +func anonymisations(configs *[]ActionConfig) ([]Anonymisation, error) { + var err error res := make([]Anonymisation, len(*configs)) for i, config := range *configs { - res[i] = config.create() + if res[i], err = config.create(); err != nil { + return nil, err + } } - return res + return res, nil } // Returns the configured salt or a random one @@ -54,20 +57,20 @@ func (ac *ActionConfig) saltOrRandom() string { return strconv.Itoa(rand.Int()) } -func (ac *ActionConfig) create() Anonymisation { +func (ac *ActionConfig) create() (Anonymisation, error) { switch ac.Name { case "nothing": - return identity + return identity, nil case "outcode": - return outcode + return outcode, nil case "hash": - return hash(ac.saltOrRandom()) + return hash(ac.saltOrRandom()), nil case "year": return year(ac.DateConfig.Format) case "ranges": return ranges(ac.RangeConfig) } - return identity + return nil, fmt.Errorf("can't create an action with name %s", ac.Name) } // The no-op, returns the input unchanged. @@ -97,20 +100,32 @@ func outcode(s string) (string, error) { // If either the format is invalid or the year doesn't // match that format, it will return an error and // the input unchanged -func year(format string) Anonymisation { +func year(format string) (Anonymisation, error) { + if _, err := time.Parse(format, format); err != nil { + return nil, err + } return func(s string) (string, error) { t, err := time.Parse(format, s) if err != nil { return s, err } return strconv.Itoa(t.Year()), nil - } + }, nil } // Given a list of ranges, it will summarise numeric // values into groups of values, each group defined // by a range and an output -func ranges(ranges []RangeConfig) Anonymisation { +func ranges(ranges []RangeConfig) (Anonymisation, error) { + for _, rc := range ranges { + if rc.Gt != nil && rc.Gte != nil || rc.Lt != nil && rc.Lte != nil { + return nil, errors.New("you can only specify one of (gt, gte) and (lt, lte)") + } else if rc.Gt == nil && rc.Gte == nil && rc.Lt == nil && rc.Lte == nil { + return nil, errors.New("you need to specify at least one of gt, gte, lt, lte") + } else if rc.Output == nil { + return nil, errors.New("you need to specify the output for a range") + } + } return func(s string) (string, error) { v, err := strconv.ParseFloat(s, 64) if err != nil { @@ -122,7 +137,7 @@ func ranges(ranges []RangeConfig) Anonymisation { } } return s, errors.New("No range defined for value") - } + }, nil } func (r *RangeConfig) contains(v float64) bool { diff --git a/anonymisations_test.go b/anonymisations_test.go index 5576deb..8cacad8 100644 --- a/anonymisations_test.go +++ b/anonymisations_test.go @@ -8,48 +8,167 @@ import ( "github.com/leanovate/gopter/gen" "github.com/leanovate/gopter/prop" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestAnonymisations(t *testing.T) { - salt := "jump" - conf := &[]ActionConfig{ - ActionConfig{ - Name: "nothing", - }, - ActionConfig{ - Name: "hash", - Salt: &salt, - }, - } - // can't test that the functions are equal because of https://github.com/stretchr/testify/issues/182 - // and https://github.com/stretchr/testify/issues/159#issuecomment-99557398 - // will have to test that the functions return the same - anons := anonymisations(conf) - expectedRes, expectedErr := identity("a") - actualRes, actualErr := anons[0]("a") - assert.Equal(t, expectedRes, actualRes) - assert.Equal(t, expectedErr, actualErr) - expectedRes, expectedErr = hash("jump")("a") - actualRes, actualErr = anons[1]("a") +var salt = "jump" + +const seed = int64(1) + +//this is the first random salt with the seed above +const firstSalt = "5577006791947779410" + +// can't test that the functions are equal because of https://github.com/stretchr/testify/issues/182 +// and https://github.com/stretchr/testify/issues/159#issuecomment-99557398 +// will have to test that the functions return the same +func assertAnonymisationFunction(t *testing.T, expected Anonymisation, actual Anonymisation, value string) { + require.NotNil(t, expected) + require.NotNil(t, actual) + expectedRes, expectedErr := expected(value) + actualRes, actualErr := actual(value) assert.Equal(t, expectedRes, actualRes) assert.Equal(t, expectedErr, actualErr) } -func TestActionConfig(t *testing.T) { - t.Run("saltOrRandom", func(t *testing.T) { - t.Run("if salt is not specified", func(t *testing.T) { +func TestAnonymisations(t *testing.T) { + t.Run("a valid configuration", func(t *testing.T) { + conf := &[]ActionConfig{ + ActionConfig{ + Name: "nothing", + }, + ActionConfig{ + Name: "hash", + Salt: &salt, + }, + } + anons, err := anonymisations(conf) + assert.NoError(t, err) + assertAnonymisationFunction(t, identity, anons[0], "a") + assertAnonymisationFunction(t, hash(salt), anons[1], "a") + }) + t.Run("an invalid configuration", func(t *testing.T) { + conf := &[]ActionConfig{ActionConfig{Name: "year", DateConfig: DateConfig{Format: "3333"}}} + anons, err := anonymisations(conf) + assert.Error(t, err, "should return an error") + assert.Nil(t, anons) + }) +} + +func TestActionConfigSaltOrRandom(t *testing.T) { + t.Run("if salt is not specified", func(t *testing.T) { + rand.Seed(seed) + acNoSalt := ActionConfig{Name: "hash"} + assert.Equal(t, firstSalt, acNoSalt.saltOrRandom(), "should return a random salt") + }) + t.Run("if salt is specified", func(t *testing.T) { + emptySalt := "" + acEmptySalt := ActionConfig{Name: "hash", Salt: &emptySalt} + assert.Empty(t, acEmptySalt.saltOrRandom(), "should return the empty salt if empty") + + acSalt := ActionConfig{Name: "hash", Salt: &salt} + assert.Equal(t, "jump", acSalt.saltOrRandom(), "should return the salt") + }) +} + +func TestActionConfigCreate(t *testing.T) { + t.Run("invalid name", func(t *testing.T) { + ac := ActionConfig{Name: "invalid name"} + res, err := ac.create() + assert.Error(t, err) + assert.Nil(t, res) + }) + t.Run("identity", func(t *testing.T) { + ac := ActionConfig{Name: "nothing"} + res, err := ac.create() + assert.NoError(t, err) + assertAnonymisationFunction(t, identity, res, "a") + }) + t.Run("outcode", func(t *testing.T) { + ac := ActionConfig{Name: "outcode"} + res, err := ac.create() + assert.NoError(t, err) + assertAnonymisationFunction(t, outcode, res, "a") + }) + t.Run("hash", func(t *testing.T) { + t.Run("if salt is not specified uses a random salt", func(t *testing.T) { rand.Seed(1) - acNoSalt := ActionConfig{Name: "hash"} - assert.Equal(t, "5577006791947779410", acNoSalt.saltOrRandom(), "should return a random salt") + ac := ActionConfig{Name: "hash"} + res, err := ac.create() + assert.NoError(t, err) + assertAnonymisationFunction(t, hash(firstSalt), res, "a") + }) + t.Run("if salt is specified uses it", func(t *testing.T) { + ac := ActionConfig{Name: "hash", Salt: &salt} + res, err := ac.create() + assert.NoError(t, err) + assertAnonymisationFunction(t, hash(salt), res, "a") + }) + }) + t.Run("year", func(t *testing.T) { + t.Run("with an invalid format", func(t *testing.T) { + ac := ActionConfig{Name: "year", DateConfig: DateConfig{Format: "11112233"}} + res, err := ac.create() + assert.Error(t, err, "should fail") + assert.Nil(t, res) + }) + t.Run("with a valid format", func(t *testing.T) { + ac := ActionConfig{Name: "year", DateConfig: DateConfig{Format: "20060102"}} + res, err := ac.create() + assert.NoError(t, err, "should not fail") + y, err := year("20060102") + assert.NoError(t, err) + assertAnonymisationFunction(t, y, res, "21121212") + }) + }) + t.Run("ranges", func(t *testing.T) { + num := 2.0 + output := "0-100" + t.Run("range has at least one of lt, lte, gt, gte", func(t *testing.T) { + ac := ActionConfig{ + Name: "ranges", + RangeConfig: []RangeConfig{RangeConfig{Output: &output}}, + } + r, err := ac.create() + assert.Error(t, err, "if not should return an error") + assert.Nil(t, r) + }) + t.Run("range contains both lt and lte", func(t *testing.T) { + ac := ActionConfig{ + Name: "ranges", + RangeConfig: []RangeConfig{RangeConfig{Lt: &num, Lte: &num, Output: &output}}, + } + r, err := ac.create() + assert.Error(t, err, "if not should return an error") + assert.Nil(t, r) + }) + t.Run("range contains both gt and gte", func(t *testing.T) { + ac := ActionConfig{ + Name: "ranges", + RangeConfig: []RangeConfig{RangeConfig{Gt: &num, Gte: &num, Output: &output}}, + } + r, err := ac.create() + assert.Error(t, err, "if not should return an error") + assert.Nil(t, r) + }) + t.Run("range without output defined", func(t *testing.T) { + ac := ActionConfig{ + Name: "ranges", + RangeConfig: []RangeConfig{RangeConfig{Lt: &num, Gte: &num}}, + } + r, err := ac.create() + assert.Error(t, err, "if not should return an error") + assert.Nil(t, r) }) - t.Run("if salt is specified", func(t *testing.T) { - emptySalt := "" - acEmptySalt := ActionConfig{Name: "hash", Salt: &emptySalt} - assert.Empty(t, acEmptySalt.saltOrRandom(), "should return the empty salt if empty") - - salt := "jump" - acSalt := ActionConfig{Name: "hash", Salt: &salt} - assert.Equal(t, "jump", acSalt.saltOrRandom(), "should return the salt") + t.Run("valid range", func(t *testing.T) { + rangeConfigs := []RangeConfig{RangeConfig{Lte: &num, Gte: &num, Output: &output}} + ac := ActionConfig{ + Name: "ranges", + RangeConfig: rangeConfigs, + } + r, err := ac.create() + expected, _ := ranges(rangeConfigs) + assert.NoError(t, err) + assertAnonymisationFunction(t, expected, r, "2") }) }) } @@ -108,7 +227,7 @@ func TestOutcode(t *testing.T) { } func TestYear(t *testing.T) { - f := year("20060102") + f, _ := year("20060102") t.Run("if the date can be parsed", func(t *testing.T) { res, err := f("20120102") assert.NoError(t, err, "should return no error") @@ -124,7 +243,7 @@ func TestRanges(t *testing.T) { min := 0.0 max := 100.0 output := "0-100" - f := ranges([]RangeConfig{RangeConfig{Gt: &min, Lte: &max, Output: &output}}) + f, _ := ranges([]RangeConfig{RangeConfig{Gt: &min, Lte: &max, Output: &output}}) t.Run("if the value is not a float", func(t *testing.T) { res, err := f("input") assert.Error(t, err, "should return an error") diff --git a/config.go b/config.go index fd4b26a..126241c 100644 --- a/config.go +++ b/config.go @@ -2,7 +2,6 @@ package main import ( "encoding/json" - "errors" "os" ) @@ -53,18 +52,3 @@ func loadConfig(filename string) (*Config, error) { } return &conf, err } - -func (conf *Config) validate() error { - for _, action := range conf.Actions { - for _, rc := range action.RangeConfig { - if rc.Gt != nil && rc.Gte != nil || rc.Lt != nil && rc.Lte != nil { - return errors.New("You can only specify one of (gt, gte) and (lt, lte)") - } else if rc.Gt == nil && rc.Gte == nil && rc.Lt == nil && rc.Lte == nil { - return errors.New("You need to specify at least one of gt, gte, lt, lte") - } else if rc.Output == nil { - return errors.New("You need to specify the output for a range") - } - } - } - return nil -} diff --git a/config_test.go b/config_test.go index ffe389a..e90e5b4 100644 --- a/config_test.go +++ b/config_test.go @@ -76,92 +76,3 @@ func TestLoadConfig(t *testing.T) { }, *conf, "should return the config properly decoded") }) } - -func TestValidateConfig(t *testing.T) { - t.Run("range config validation", func(t *testing.T) { - num := 2.0 - output := "0-100" - t.Run("range has at least one of lt, lte, gt, gte", func(t *testing.T) { - conf := Config{ - Actions: []ActionConfig{ - ActionConfig{ - Name: "range", - RangeConfig: []RangeConfig{ - RangeConfig{ - Output: &output, - }, - }, - }, - }, - } - assert.Error(t, conf.validate(), "should return an error") - }) - t.Run("range contains both lt and lte", func(t *testing.T) { - conf := Config{ - Actions: []ActionConfig{ - ActionConfig{ - Name: "range", - RangeConfig: []RangeConfig{ - RangeConfig{ - Lt: &num, - Lte: &num, - Output: &output, - }, - }, - }, - }, - } - assert.Error(t, conf.validate(), "should return an error") - }) - t.Run("range contains both gt and gte", func(t *testing.T) { - conf := Config{ - Actions: []ActionConfig{ - ActionConfig{ - Name: "range", - RangeConfig: []RangeConfig{ - RangeConfig{ - Gt: &num, - Gte: &num, - Output: &output, - }, - }, - }, - }, - } - assert.Error(t, conf.validate(), "should return an error") - }) - t.Run("range without output defined", func(t *testing.T) { - conf := Config{ - Actions: []ActionConfig{ - ActionConfig{ - Name: "range", - RangeConfig: []RangeConfig{ - RangeConfig{ - Lt: &num, - Gte: &num, - }, - }, - }, - }, - } - assert.Error(t, conf.validate(), "should return an error") - }) - t.Run("config contains a correct range", func(t *testing.T) { - conf := Config{ - Actions: []ActionConfig{ - ActionConfig{ - Name: "range", - RangeConfig: []RangeConfig{ - RangeConfig{ - Lt: &num, - Gte: &num, - Output: &output, - }, - }, - }, - }, - } - assert.NoError(t, conf.validate(), "should not return an error") - }) - }) -} diff --git a/main.go b/main.go index e3477bf..ac4c7b2 100644 --- a/main.go +++ b/main.go @@ -6,10 +6,13 @@ import ( "hash/fnv" "io" "log" + "math/rand" "os" + "time" ) func main() { + rand.Seed(time.Now().UTC().UnixNano()) //TODO move args parsing to a function configFile := flag.String("config", "config.json", "Configuration of the data to be anonymised. Default is 'config.json'") outputFile := flag.String("output", "", "Output file. Default is stdout.") @@ -18,12 +21,13 @@ func main() { conf, err := loadConfig(*configFile) if err != nil { log.Fatal(err) - } else if err = conf.validate(); err != nil { - log.Fatal(err) } r := initReader(flag.Arg(0), conf.Csv) w := initWriter(*outputFile, conf.Csv) - anons := anonymisations(&conf.Actions) + anons, err := anonymisations(&conf.Actions) + if err != nil { + log.Fatal(err) + } i := 0 for { diff --git a/main_test.go b/main_test.go index 3e551cd..4548823 100644 --- a/main_test.go +++ b/main_test.go @@ -86,7 +86,7 @@ func stdOutOk(s string) (*os.File, error) { func TestAnonymise(t *testing.T) { record := []string{"a", "b", "c"} - actions := []Anonymisation{identity, hash, identity} + actions := []Anonymisation{identity, hash(""), identity} output := []string{"a", "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98", "c"} res, err := anonymise(record, actions) assert.NoError(t, err)