From f2509340ff3bbfc5662f1cdc77c2de8792da5977 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Thu, 7 Sep 2023 09:03:38 -0700 Subject: [PATCH 1/8] Add difference, subset, superset, intersection, symmetric difference to sets --- doc/spec.md | 7 +++ starlark/eval.go | 35 ++++-------- starlark/library.go | 82 +++++++++++++++++++++++++++ starlark/testdata/builtins.star | 2 +- starlark/testdata/set.star | 45 ++++++++++++--- starlark/value.go | 98 +++++++++++++++++++++++++++++++++ 6 files changed, 238 insertions(+), 31 deletions(-) diff --git a/doc/spec.md b/doc/spec.md index c98ee173..f5ad5fa4 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -2045,6 +2045,12 @@ Sets int & int # bitwise intersection (AND) set & set # set intersection set ^ set # set symmetric difference + set - set # set difference + set >= set # superset + set > set # proper superset + set <= set # subset + set < set # proper subset + Dict dict | dict # ordered union @@ -2115,6 +2121,7 @@ Implementations may impose a limit on the second operand of a left shift. set([1, 2]) & set([2, 3]) # set([2]) set([1, 2]) | set([2, 3]) # set([1, 2, 3]) set([1, 2]) ^ set([2, 3]) # set([1, 3]) +set([1, 2]) - set([2, 3]) # set([1]) ``` Implementation note: diff --git a/starlark/eval.go b/starlark/eval.go index 706c6249..fe005691 100644 --- a/starlark/eval.go +++ b/starlark/eval.go @@ -826,6 +826,12 @@ func Binary(op syntax.Token, x, y Value) (Value, error) { } return x - yf, nil } + case *Set: // difference + if y, ok := y.(*Set); ok { + iter := y.Iterate() + defer iter.Done() + return x.Difference(iter) + } } case syntax.STAR: @@ -1097,17 +1103,9 @@ func Binary(op syntax.Token, x, y Value) (Value, error) { } case *Set: // intersection if y, ok := y.(*Set); ok { - set := new(Set) - if x.Len() > y.Len() { - x, y = y, x // opt: range over smaller set - } - for xe := x.ht.head; xe != nil; xe = xe.next { - // Has, Insert cannot fail here. - if found, _ := y.Has(xe.key); found { - set.Insert(xe.key) - } - } - return set, nil + iter := y.Iterate() + defer iter.Done() + return x.Intersection(iter) } } @@ -1119,18 +1117,9 @@ func Binary(op syntax.Token, x, y Value) (Value, error) { } case *Set: // symmetric difference if y, ok := y.(*Set); ok { - set := new(Set) - for xe := x.ht.head; xe != nil; xe = xe.next { - if found, _ := y.Has(xe.key); !found { - set.Insert(xe.key) - } - } - for ye := y.ht.head; ye != nil; ye = ye.next { - if found, _ := x.Has(ye.key); !found { - set.Insert(ye.key) - } - } - return set, nil + iter := y.Iterate() + defer iter.Done() + return x.symmetricDifference(iter) } } diff --git a/starlark/library.go b/starlark/library.go index 0e3c7bf7..1da3910d 100644 --- a/starlark/library.go +++ b/starlark/library.go @@ -142,9 +142,14 @@ var ( setMethods = map[string]*Builtin{ "add": NewBuiltin("add", set_add), "clear": NewBuiltin("clear", set_clear), + "difference": NewBuiltin("difference", set_difference), "discard": NewBuiltin("discard", set_discard), + "intersection": NewBuiltin("intersection", set_intersection), + "issubset": NewBuiltin("issubset", set_issubset), + "issuperset": NewBuiltin("issuperset", set_issuperset), "pop": NewBuiltin("pop", set_pop), "remove": NewBuiltin("remove", set_remove), + "symmetric_difference": NewBuiltin("symmetric_difference", set_symmetric_difference), "union": NewBuiltin("union", set_union), } ) @@ -2204,6 +2209,68 @@ func set_clear(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) return None, nil } +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set·difference. +func set_difference(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + // TODO: support multiple others: s.difference(*others) + var other Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { + return nil, err + } + iter := other.Iterate() + defer iter.Done() + diff, err := b.Receiver().(*Set).Difference(iter) + if err != nil { + return nil, nameErr(b, err) + } + return diff, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_intersection. +func set_intersection(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + // TODO: support multiple others: s.difference(*others) + var other Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { + return nil, err + } + iter := other.Iterate() + defer iter.Done() + diff, err := b.Receiver().(*Set).Intersection(iter) + if err != nil { + return nil, nameErr(b, err) + } + return diff, nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_intersection. +func set_issubset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var other Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { + return nil, err + } + iter := other.Iterate() + defer iter.Done() + diff, err := b.Receiver().(*Set).isSubset(iter) + if err != nil { + return nil, nameErr(b, err) + } + return Bool(diff), nil +} + +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_intersection. +func set_issuperset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var other Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { + return nil, err + } + iter := other.Iterate() + defer iter.Done() + diff, err := b.Receiver().(*Set).isSuperset(iter) + if err != nil { + return nil, nameErr(b, err) + } + return Bool(diff), nil +} + // https://github.com/google/starlark-go/blob/master/doc/spec.md#set·discard. func set_discard(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { var k Value @@ -2252,6 +2319,21 @@ func set_remove(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error return nil, nameErr(b, "missing key") } +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set·symmetric_difference. +func set_symmetric_difference(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var other Iterable + if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { + return nil, err + } + iter := other.Iterate() + defer iter.Done() + diff, err := b.Receiver().(*Set).symmetricDifference(iter) + if err != nil { + return nil, nameErr(b, err) + } + return diff, nil +} + // https://github.com/google/starlark-go/blob/master/doc/spec.md#set·union. func set_union(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { var iterable Iterable diff --git a/starlark/testdata/builtins.star b/starlark/testdata/builtins.star index c350c967..c7188fa3 100644 --- a/starlark/testdata/builtins.star +++ b/starlark/testdata/builtins.star @@ -196,7 +196,7 @@ assert.eq(getattr(hf, "x"), 2) assert.eq(hf.x, 2) # built-in types can have attributes (methods) too. myset = set([]) -assert.eq(dir(myset), ["add", "clear", "discard", "pop", "remove", "union"]) +assert.eq(dir(myset), ["add", "clear", "difference", "discard", "intersection", "issubset", "issuperset", "pop", "remove", "symmetric_difference", "union"]) assert.true(hasattr(myset, "union")) assert.true(not hasattr(myset, "onion")) assert.eq(str(getattr(myset, "union")), "") diff --git a/starlark/testdata/set.star b/starlark/testdata/set.star index 3dcde3c1..b83918e5 100644 --- a/starlark/testdata/set.star +++ b/starlark/testdata/set.star @@ -1,5 +1,5 @@ # Tests of Starlark 'set' -# option:set +# option:set option:globalreassign # Sets are not a standard part of Starlark, so the features # tested in this file must be enabled in the application by setting @@ -9,9 +9,7 @@ # TODO(adonovan): support set mutation: # - del set[k] -# - set.remove # - set.update -# - set.clear # - set += iterable, perhaps? # Test iterator invalidation. @@ -67,12 +65,16 @@ assert.eq(list(x.union([5, 1])), [1, 2, 3, 5]) assert.eq(list(x.union((6, 5, 4))), [1, 2, 3, 6, 5, 4]) assert.fails(lambda : x.union([1, 2, {}]), "unhashable type: dict") -# intersection, set & set (use resolve.AllowBitwise to enable it) +# intersection, set & set or set.intersection(iterable) assert.eq(list(set("a".elems()) & set("b".elems())), []) assert.eq(list(set("ab".elems()) & set("bc".elems())), ["b"]) +assert.eq(list(set("a".elems()).intersection("b".elems())), []) +assert.eq(list(set("ab".elems()).intersection("bc".elems())), ["b"]) -# symmetric difference, set ^ set (use resolve.AllowBitwise to enable it) +# symmetric difference, set ^ set or set.symmetric_difference(iterable) assert.eq(set([1, 2, 3]) ^ set([4, 5, 3]), set([1, 2, 4, 5])) +assert.eq(set([1,2,3,4]).symmetric_difference([3,4,5,6]), set([1,2,5,6])) +assert.eq(set([1,2,3,4]).symmetric_difference(set([])), set([1,2,3,4])) def test_set_augmented_assign(): x = set([1, 2, 3]) @@ -100,7 +102,6 @@ assert.eq(x, x) assert.eq(y, y) assert.true(x != y) assert.eq(set([1, 2, 3]), set([3, 2, 1])) -assert.fails(lambda : x < y, "set < set not implemented") # iteration assert.true(type([elem for elem in x]), "list") @@ -154,7 +155,6 @@ pop_set.add(2) freeze(pop_set) assert.fails(lambda: pop_set.pop(), "pop: cannot delete from frozen hash table") - # clear clear_set = set([1,2,3]) clear_set.clear() @@ -165,3 +165,34 @@ assert.eq(clear_set.clear(), None) other_clear_set = set([1,2,3]) freeze(other_clear_set) assert.fails(lambda: other_clear_set.clear(), "clear: cannot clear frozen hash table") + +# difference: set - set or set.difference(iterable) +assert.eq(set([1,2,3,4]).difference([1,2,3,4]), set([])) +assert.eq(set([1,2,3,4]).difference([1,2]), set([3,4])) +assert.eq(set([1,2,3,4]).difference([]), set([1,2,3,4])) +assert.eq(set([1,2,3,4]).difference(set([1,2,3])), set([4])) + +assert.eq(set([1,2,3,4]) - set([1,2,3,4]), set()) +assert.eq(set([1,2,3,4]) - set([1,2]), set([3,4])) + +# issuperset: set >= set or set.issuperset(iterable) +assert.true(set([1,2,3]).issuperset([1,2])) +assert.true(not set([1,2,3]).issuperset(set([1,2,4]))) +assert.true(set([1,2,3]) >= set([1,2,3])) +assert.true(set([1,2,3]) >= set([1,2])) +assert.true(not set([1,2,3]) >= set([1,2,4])) + +# proper superset: set > set +assert.true(set([1, 2, 3]) > set([1, 2])) +assert.true(not set([1,2, 3]) > set([1, 2, 3])) + +# issubset: set <= set or set.issubset(iterable) +assert.true(set([1,2]).issubset([1,2,3])) +assert.true(not set([1,2,3]).issubset(set([1,2,4]))) +assert.true(set([1,2,3]) <= set([1,2,3])) +assert.true(set([1,2]) <= set([1,2,3])) +assert.true(not set([1,2,3]) <= set([1,2,4])) + +# proper subset: set < set +assert.true(set([1,2]) < set([1,2,3])) +assert.true(not set([1,2,3]) < set([1,2,3])) diff --git a/starlark/value.go b/starlark/value.go index 3ceacc60..e290a913 100644 --- a/starlark/value.go +++ b/starlark/value.go @@ -1134,6 +1134,26 @@ func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error case syntax.NEQ: ok, err := setsEqual(x, y, depth) return !ok, err + case syntax.GE: // superset + iter := y.Iterate() + defer iter.Done() + return x.isSuperset(iter) + case syntax.LE: // subset + iter := y.Iterate() + defer iter.Done() + return x.isSubset(iter) + case syntax.GT: // proper superset + eq, err := setsEqual(x, y, depth) + iter := y.Iterate() + defer iter.Done() + ok, err := x.isSuperset(iter) + return ok && !eq, err + case syntax.LT: // proper subset + eq, _ := setsEqual(x, y, depth) + iter := y.Iterate() + defer iter.Done() + ok, err := x.isSubset(iter) + return ok && !eq, err default: return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) } @@ -1165,6 +1185,84 @@ func (s *Set) Union(iter Iterator) (Value, error) { return set, nil } +func (s *Set) Difference(other Iterator) (Value, error) { + diff := new(Set) + for e := s.ht.head; e != nil; e = e.next { + diff.Insert(e.key) // can't fail + } + var x Value + for other.Next(&x) { + if _, err := diff.Delete(x); err != nil { + return nil, err + } + } + return diff, nil +} + +func (s *Set) isSuperset(other Iterator) (bool, error) { + var x Value + for other.Next(&x) { + found, err := s.Has(x) + if err != nil { + return false, err + } + if !found { + return false, nil + } + } + return true, nil +} + +func (s *Set) isSubset(other Iterator) (bool, error) { + var x Value + otherset := new(Set) + for other.Next(&x) { + err := otherset.Insert(x) + if err != nil { + return false, err + } + } + iter := s.Iterate() + defer iter.Done() + return otherset.isSuperset(iter) +} + +func (s *Set) Intersection(other Iterator) (Value, error) { + intersect := new(Set) + var x Value + for other.Next(&x) { + found, err := s.Has(x) + if err != nil { + return nil, err + } + if found { + err = intersect.Insert(x) + if err != nil { + return nil, err + } + } + } + return intersect, nil +} + +func (s *Set) symmetricDifference(other Iterator) (Value, error) { + diff := new(Set) + for e := s.ht.head; e != nil; e = e.next { + diff.Insert(e.key) // can't fail + } + var x Value + for other.Next(&x) { + found, err := diff.Delete(x) + if err != nil { + return nil, err + } + if !found { + diff.Insert(x) + } + } + return diff, nil +} + // toString returns the string form of value v. // It may be more efficient than v.String() for larger values. func toString(v Value) string { From 1dccdbb4610bff4e074e8386fb1383b08a98700f Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Thu, 7 Sep 2023 09:22:39 -0700 Subject: [PATCH 2/8] Added documentation on new set operations --- doc/spec.md | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/doc/spec.md b/doc/spec.md index f5ad5fa4..221bb1b3 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -155,9 +155,14 @@ reproducibility is paramount, such as build tools. * [list·remove](#list·remove) * [set·add](#set·add) * [set·clear](#set·clear) + * [set·difference](#set·difference) * [set·discard](#set·discard) + * [set·intersection](#set·intersection) + * [set·issubset](#set·issubset) + * [set·issuperset](#set·issuperset) * [set·pop](#set·pop) * [set·remove](#set·remove) + * [set·symmetric_difference](#set·symmetric_difference) * [set·union](#set·union) * [string·capitalize](#string·capitalize) * [string·codepoint_ords](#string·codepoint_ords) @@ -973,11 +978,16 @@ which must be an iterable sequence. Sets have no literal syntax. A set has these methods: -* [`add`](#set·add) +* [`add``](#set·add) * [`clear`](#set·clear) +* [`difference`](#set·difference) * [`discard`](#set·discard) +* [`intersection`](#set·intersection) +* [`issubset`](#set·issubset) +* [`issuperset`](#set·issuperset) * [`pop`](#set·pop) * [`remove`](#set·remove) +* [`symmetric_difference`](#set·symmetric_difference) * [`union`](#set·union) @@ -3789,6 +3799,21 @@ x.clear(2) # None x # set([]) ``` + +### set·difference + +`S.difference(y)` returns a new set which includes all items in S which are not in y. + +y can be any type of iterable (set, list, tuple). + +The difference between two sets can also be expressed using the `-` operator. + +```python +x = set([1, 2, 3]) +x.difference([3, 4, 5]) # set([1,2]) +x - set([1, 2]) # set([3]) +``` + ### set·discard @@ -3805,6 +3830,59 @@ x.discard(2) # None x # set([1, 3]) ``` + +### set·intersection + +`S.intersection(y)` returns a new set which includes all items in S which are also in y. + +y can be any type of iterable (set, list, tuple). + +The difference between two sets can also be expressed using the `&` operator. + +```python +x = set([1, 2, 3]) +x.intersection([3, 4, 5]) # set([3]) +x & set([1, 2]) # set([1,2]) +``` + + +### set·issubset + +`S.issubset(y)` returns True if all items in S are also in y, otherwise it returns False. + +y can be any type of iterable (set, list, tuple). + +The `<=` operator can be used to determine if a given set is a subset of another. + +The `<` operator can be used to determine if a given set is a proper subset of another, which is equivalent to `x <= y and x != y` + +```python +x = set([1, 2]) +x.issubset([1, 2, 3]) # True +x.issubset([1, 3, 4]) # False +x <= set([1, 2]) # True +x < set([1, 2]) # False +``` + + +### set·issuperset + +`S.issuperset(y)` returns True if all items in y are also in S, otherwise it returns False. + +y can be any type of iterable (set, list, tuple). + +The `>=` operator can be used to determine if a given set is a superset of another. + +The `>` operator can be used to determine if a given set is a proper superset of another, which is equivalent to `x >= y and x != y` + +```python +x = set([1, 2, 3]) +x.issuperset([1, 2]) # True +x.issuperset([1, 3, 4]) # False +x >= set([1, 2, 3]) # True +x > set([1, 2]) # False +``` + ### set·pop From a0b55565cf8ca2ea77f1c7d7adae22594d66f410 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Thu, 7 Sep 2023 09:25:05 -0700 Subject: [PATCH 3/8] Remove outdated comments regarding allowBitwise flag --- starlark/testdata/int.star | 2 -- starlark/testdata/set.star | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/starlark/testdata/int.star b/starlark/testdata/int.star index 46c0ad0d..f0e2cde3 100644 --- a/starlark/testdata/int.star +++ b/starlark/testdata/int.star @@ -74,7 +74,6 @@ def compound(): x %= 3 assert.eq(x, 2) - # use resolve.AllowBitwise to enable the ops: x = 2 x &= 1 assert.eq(x, 0) @@ -197,7 +196,6 @@ assert.fails(lambda: int("0x-4", 16), "invalid literal with base 16: 0x-4") # bitwise union (int|int), intersection (int&int), XOR (int^int), unary not (~int), # left shift (int<>int). -# use resolve.AllowBitwise to enable the ops. # TODO(adonovan): this is not yet in the Starlark spec, # but there is consensus that it should be. assert.eq(1 | 2, 3) diff --git a/starlark/testdata/set.star b/starlark/testdata/set.star index b83918e5..303b4472 100644 --- a/starlark/testdata/set.star +++ b/starlark/testdata/set.star @@ -46,7 +46,7 @@ y = set([3, 4, 5]) # set + any is not defined assert.fails(lambda : x + y, "unknown.*: set \\+ set") -# set | set (use resolve.AllowBitwise to enable it) +# set | set assert.eq(list(set("a".elems()) | set("b".elems())), ["a", "b"]) assert.eq(list(set("ab".elems()) | set("bc".elems())), ["a", "b", "c"]) assert.fails(lambda : set() | [], "unknown binary op: set | list") From 0d38f007cae7ffacf66fd21f6dbc2c965fb008a3 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Thu, 7 Sep 2023 09:35:19 -0700 Subject: [PATCH 4/8] fixes --- doc/spec.md | 17 ++++++++++++++++- starlark/library.go | 4 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/spec.md b/doc/spec.md index 221bb1b3..92f7c09d 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -978,7 +978,7 @@ which must be an iterable sequence. Sets have no literal syntax. A set has these methods: -* [`add``](#set·add) +* [`add`](#set·add) * [`clear`](#set·clear) * [`difference`](#set·difference) * [`discard`](#set·discard) @@ -3911,6 +3911,21 @@ x # set([1, 3]) x.remove(2) # error: element not found ``` + +### set·symmetric_difference + +`S.symmetric_difference(y)` returns a new set which includes all items in S and y, except for items which are in both. + +y can be any type of iterable (set, list, tuple). + +The symmetric difference between two sets can also be expressed using the `^` operator. + +```python +x = set([1, 2, 3]) +x.symmetric_difference([3, 4, 5]) # set([1, 2, 4, 5]) +x ^ set([2, 3, 4]) # set([1, 4]) +``` + ### set·union diff --git a/starlark/library.go b/starlark/library.go index 1da3910d..f74eee7a 100644 --- a/starlark/library.go +++ b/starlark/library.go @@ -2241,7 +2241,7 @@ func set_intersection(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, return diff, nil } -// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_intersection. +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_issubset. func set_issubset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { var other Iterable if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { @@ -2256,7 +2256,7 @@ func set_issubset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, err return Bool(diff), nil } -// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_intersection. +// https://github.com/google/starlark-go/blob/master/doc/spec.md#set_issuperset. func set_issuperset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { var other Iterable if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &other); err != nil { From 20837bccb33641685533471f380b525c0fffccaf Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Thu, 7 Sep 2023 09:41:09 -0700 Subject: [PATCH 5/8] go fmt --- starlark/library.go | 20 ++++++++++---------- starlark/value.go | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/starlark/library.go b/starlark/library.go index f74eee7a..03ea8f2d 100644 --- a/starlark/library.go +++ b/starlark/library.go @@ -140,17 +140,17 @@ var ( } setMethods = map[string]*Builtin{ - "add": NewBuiltin("add", set_add), - "clear": NewBuiltin("clear", set_clear), - "difference": NewBuiltin("difference", set_difference), - "discard": NewBuiltin("discard", set_discard), - "intersection": NewBuiltin("intersection", set_intersection), - "issubset": NewBuiltin("issubset", set_issubset), - "issuperset": NewBuiltin("issuperset", set_issuperset), - "pop": NewBuiltin("pop", set_pop), - "remove": NewBuiltin("remove", set_remove), + "add": NewBuiltin("add", set_add), + "clear": NewBuiltin("clear", set_clear), + "difference": NewBuiltin("difference", set_difference), + "discard": NewBuiltin("discard", set_discard), + "intersection": NewBuiltin("intersection", set_intersection), + "issubset": NewBuiltin("issubset", set_issubset), + "issuperset": NewBuiltin("issuperset", set_issuperset), + "pop": NewBuiltin("pop", set_pop), + "remove": NewBuiltin("remove", set_remove), "symmetric_difference": NewBuiltin("symmetric_difference", set_symmetric_difference), - "union": NewBuiltin("union", set_union), + "union": NewBuiltin("union", set_union), } ) diff --git a/starlark/value.go b/starlark/value.go index e290a913..76b2dcfd 100644 --- a/starlark/value.go +++ b/starlark/value.go @@ -1207,7 +1207,7 @@ func (s *Set) isSuperset(other Iterator) (bool, error) { return false, err } if !found { - return false, nil + return false, nil } } return true, nil From 059c54998e0110d1a3a89daf6e4ebf5ef506b0b3 Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Fri, 8 Sep 2023 16:48:08 -0700 Subject: [PATCH 6/8] Requested documentation fixes --- doc/spec.md | 48 ++++++++++++++---------------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/doc/spec.md b/doc/spec.md index 92f7c09d..eb8e1610 100644 --- a/doc/spec.md +++ b/doc/spec.md @@ -2005,6 +2005,11 @@ which breaks several mathematical identities. For example, if `x` is a `NaN` value, the comparisons `x < y`, `x == y`, and `x > y` all yield false for all values of `y`. +When used to compare two `set` objects, the `<=`, and `>=` operators will report +whether one set is a subset or superset of another. Similarly, using `<` or `>` will +report whether a set is a proper subset or superset of another, thus `x > y` is +equivalent to `x >= y and x != y`. + Applications may define additional types that support ordered comparison. @@ -2056,10 +2061,6 @@ Sets set & set # set intersection set ^ set # set symmetric difference set - set # set difference - set >= set # superset - set > set # proper superset - set <= set # subset - set < set # proper subset Dict @@ -3802,16 +3803,13 @@ x # set([]) ### set·difference -`S.difference(y)` returns a new set which includes all items in S which are not in y. - -y can be any type of iterable (set, list, tuple). +`S.difference(y)` returns a new set into which have been inserted all the elements of set S which are not in y. -The difference between two sets can also be expressed using the `-` operator. +y can be any type of iterable (e.g. set, list, tuple). ```python x = set([1, 2, 3]) -x.difference([3, 4, 5]) # set([1,2]) -x - set([1, 2]) # set([3]) +x.difference([3, 4, 5]) # set([1, 2]) ``` @@ -3833,16 +3831,13 @@ x # set([1, 3]) ### set·intersection -`S.intersection(y)` returns a new set which includes all items in S which are also in y. +`S.intersection(y)` returns a new set into which have been inserted all the elements of set S which are also in y. -y can be any type of iterable (set, list, tuple). - -The difference between two sets can also be expressed using the `&` operator. +y can be any type of iterable (e.g. set, list, tuple). ```python x = set([1, 2, 3]) x.intersection([3, 4, 5]) # set([3]) -x & set([1, 2]) # set([1,2]) ``` @@ -3850,18 +3845,12 @@ x & set([1, 2]) # set([1,2]) `S.issubset(y)` returns True if all items in S are also in y, otherwise it returns False. -y can be any type of iterable (set, list, tuple). - -The `<=` operator can be used to determine if a given set is a subset of another. - -The `<` operator can be used to determine if a given set is a proper subset of another, which is equivalent to `x <= y and x != y` +y can be any type of iterable (e.g. set, list, tuple). ```python x = set([1, 2]) x.issubset([1, 2, 3]) # True x.issubset([1, 3, 4]) # False -x <= set([1, 2]) # True -x < set([1, 2]) # False ``` @@ -3869,18 +3858,12 @@ x < set([1, 2]) # False `S.issuperset(y)` returns True if all items in y are also in S, otherwise it returns False. -y can be any type of iterable (set, list, tuple). - -The `>=` operator can be used to determine if a given set is a superset of another. - -The `>` operator can be used to determine if a given set is a proper superset of another, which is equivalent to `x >= y and x != y` +y can be any type of iterable (e.g. set, list, tuple). ```python x = set([1, 2, 3]) x.issuperset([1, 2]) # True x.issuperset([1, 3, 4]) # False -x >= set([1, 2, 3]) # True -x > set([1, 2]) # False ``` @@ -3914,16 +3897,13 @@ x.remove(2) # error: element not found ### set·symmetric_difference -`S.symmetric_difference(y)` returns a new set which includes all items in S and y, except for items which are in both. - -y can be any type of iterable (set, list, tuple). +`S.symmetric_difference(y)` creates a new set into which is inserted all of the items which are in S but not y, followed by all of the items which are in y but not S. -The symmetric difference between two sets can also be expressed using the `^` operator. +y can be any type of iterable (e.g. set, list, tuple). ```python x = set([1, 2, 3]) x.symmetric_difference([3, 4, 5]) # set([1, 2, 4, 5]) -x ^ set([2, 3, 4]) # set([1, 4]) ``` From de4656a35dcc683761ee4eda93c2fec450330f2d Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 11 Sep 2023 16:43:55 -0700 Subject: [PATCH 7/8] Changes requested in code review --- starlark/eval.go | 2 +- starlark/library.go | 6 ++--- starlark/value.go | 65 ++++++++++++++++++++++++++------------------- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/starlark/eval.go b/starlark/eval.go index fe005691..3ab08e47 100644 --- a/starlark/eval.go +++ b/starlark/eval.go @@ -1119,7 +1119,7 @@ func Binary(op syntax.Token, x, y Value) (Value, error) { if y, ok := y.(*Set); ok { iter := y.Iterate() defer iter.Done() - return x.symmetricDifference(iter) + return x.SymmetricDifference(iter) } } diff --git a/starlark/library.go b/starlark/library.go index 03ea8f2d..ef032ee2 100644 --- a/starlark/library.go +++ b/starlark/library.go @@ -2249,7 +2249,7 @@ func set_issubset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, err } iter := other.Iterate() defer iter.Done() - diff, err := b.Receiver().(*Set).isSubset(iter) + diff, err := b.Receiver().(*Set).IsSubset(iter) if err != nil { return nil, nameErr(b, err) } @@ -2264,7 +2264,7 @@ func set_issuperset(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, e } iter := other.Iterate() defer iter.Done() - diff, err := b.Receiver().(*Set).isSuperset(iter) + diff, err := b.Receiver().(*Set).IsSuperset(iter) if err != nil { return nil, nameErr(b, err) } @@ -2327,7 +2327,7 @@ func set_symmetric_difference(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) } iter := other.Iterate() defer iter.Done() - diff, err := b.Receiver().(*Set).symmetricDifference(iter) + diff, err := b.Receiver().(*Set).SymmetricDifference(iter) if err != nil { return nil, nameErr(b, err) } diff --git a/starlark/value.go b/starlark/value.go index 76b2dcfd..d92f103f 100644 --- a/starlark/value.go +++ b/starlark/value.go @@ -1137,23 +1137,25 @@ func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error case syntax.GE: // superset iter := y.Iterate() defer iter.Done() - return x.isSuperset(iter) + return x.IsSuperset(iter) case syntax.LE: // subset iter := y.Iterate() defer iter.Done() - return x.isSubset(iter) + return x.IsSubset(iter) case syntax.GT: // proper superset - eq, err := setsEqual(x, y, depth) + if x.Len() == y.Len() { + return false, nil + } iter := y.Iterate() defer iter.Done() - ok, err := x.isSuperset(iter) - return ok && !eq, err + return x.IsSuperset(iter) case syntax.LT: // proper subset - eq, _ := setsEqual(x, y, depth) + if x.Len() == y.Len() { + return false, nil + } iter := y.Iterate() defer iter.Done() - ok, err := x.isSubset(iter) - return ok && !eq, err + return x.IsSubset(iter) default: return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) } @@ -1171,11 +1173,28 @@ func setsEqual(x, y *Set, depth int) (bool, error) { return true, nil } -func (s *Set) Union(iter Iterator) (Value, error) { +func setFromIterator(iter Iterator) (*Set, error) { + var x Value + set := new(Set) + for iter.Next(&x) { + err := set.Insert(x) + if err != nil { + return set, err + } + } + return set, nil +} + +func (s *Set) clone() *Set { set := new(Set) for e := s.ht.head; e != nil; e = e.next { set.Insert(e.key) // can't fail } + return set +} + +func (s *Set) Union(iter Iterator) (Value, error) { + set := s.clone() var x Value for iter.Next(&x) { if err := set.Insert(x); err != nil { @@ -1186,10 +1205,7 @@ func (s *Set) Union(iter Iterator) (Value, error) { } func (s *Set) Difference(other Iterator) (Value, error) { - diff := new(Set) - for e := s.ht.head; e != nil; e = e.next { - diff.Insert(e.key) // can't fail - } + diff := s.clone() var x Value for other.Next(&x) { if _, err := diff.Delete(x); err != nil { @@ -1199,7 +1215,7 @@ func (s *Set) Difference(other Iterator) (Value, error) { return diff, nil } -func (s *Set) isSuperset(other Iterator) (bool, error) { +func (s *Set) IsSuperset(other Iterator) (bool, error) { var x Value for other.Next(&x) { found, err := s.Has(x) @@ -1213,18 +1229,14 @@ func (s *Set) isSuperset(other Iterator) (bool, error) { return true, nil } -func (s *Set) isSubset(other Iterator) (bool, error) { - var x Value - otherset := new(Set) - for other.Next(&x) { - err := otherset.Insert(x) - if err != nil { - return false, err - } +func (s *Set) IsSubset(other Iterator) (bool, error) { + otherset, err := setFromIterator(other) + if err != nil { + return false, err } iter := s.Iterate() defer iter.Done() - return otherset.isSuperset(iter) + return otherset.IsSuperset(iter) } func (s *Set) Intersection(other Iterator) (Value, error) { @@ -1245,11 +1257,8 @@ func (s *Set) Intersection(other Iterator) (Value, error) { return intersect, nil } -func (s *Set) symmetricDifference(other Iterator) (Value, error) { - diff := new(Set) - for e := s.ht.head; e != nil; e = e.next { - diff.Insert(e.key) // can't fail - } +func (s *Set) SymmetricDifference(other Iterator) (Value, error) { + diff := s.clone() var x Value for other.Next(&x) { found, err := diff.Delete(x) From 0b765b6bb08b0f6f0ca0a499478d680e1b5edf3f Mon Sep 17 00:00:00 2001 From: Sam Wheating Date: Mon, 11 Sep 2023 16:55:44 -0700 Subject: [PATCH 8/8] use fast checks based on set length in set.CompareSameType --- starlark/value.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/starlark/value.go b/starlark/value.go index d92f103f..db9ba113 100644 --- a/starlark/value.go +++ b/starlark/value.go @@ -1135,22 +1135,28 @@ func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error ok, err := setsEqual(x, y, depth) return !ok, err case syntax.GE: // superset + if x.Len() < y.Len() { + return false, nil + } iter := y.Iterate() defer iter.Done() return x.IsSuperset(iter) case syntax.LE: // subset + if x.Len() > y.Len() { + return false, nil + } iter := y.Iterate() defer iter.Done() return x.IsSubset(iter) case syntax.GT: // proper superset - if x.Len() == y.Len() { + if x.Len() <= y.Len() { return false, nil } iter := y.Iterate() defer iter.Done() return x.IsSuperset(iter) case syntax.LT: // proper subset - if x.Len() == y.Len() { + if x.Len() >= y.Len() { return false, nil } iter := y.Iterate()