Skip to content

Commit 1448698

Browse files
Add unique functions to algorithms.nim (#645)
These functions are similar to but not as fully featured as `numpy.unique`. They are missing a way to count the number or returning the indexes of the unique elements. However, they make it possibel to (optionally) sort the output, or to use a more efficient algorithm if the input is already sorted.
1 parent 05ae049 commit 1448698

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

src/arraymancer/tensor/algorithms.nim

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,80 @@ proc argsort*[T](t: Tensor[T], order = SortOrder.Ascending, toCopy = false): Ten
5757
result = newTensorUninit[int](t.shape)
5858
for i in 0 ..< t.size:
5959
result[i] = tups[i][1]
60+
61+
proc unique*[T](t: Tensor[T], isSorted=false): Tensor[T] =
62+
## Return a new Tensor with the unique elements of the input Tensor in the order they first appear
63+
##
64+
## Note that this is the *"unsorted"* version of this procedure which returns
65+
## the unique values in the order in which they first appear on the input.
66+
## Do not get confused by the `isSorted` argument which is not used to sort
67+
## the output, but to make the algorithm more efficient when the input tensor
68+
## is already sorted.
69+
##
70+
## There is another version of this procedure which gets an `order` argument
71+
## that let's you sort the output (in ascending or descending order).
72+
##
73+
## Inputs:
74+
## - t: The input Tensor
75+
## - isSorted: Set this to `true` if the input tensor is already sorted,
76+
## in order to use a more efficient algorithm for finding the
77+
## unique of the input Tensor. Be careful however when using
78+
## this option, since if the input tensor is not really sorted,
79+
## the output will be wrong.
80+
##
81+
## Result:
82+
## - A new Tensor with the unique elements of the input Tensor in the order
83+
## in which they first appear on the input Tensor.
84+
##
85+
## Examples:
86+
## ```nim
87+
## let
88+
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
89+
## assert dup.unique == [1, 3, 2, 4, 8].toTensor
90+
##
91+
## # Use `isSorted = true` only if the input tensor is already sorted
92+
## assert dup.sorted.unique(isSorted = true) == [1, 3, 2, 4, 8].toTensor
93+
## ```
94+
95+
if t.is_C_contiguous:
96+
# Note that since deduplicate returns a new sequence, it is safe to apply it
97+
# to a view of the raw data of the input tensor
98+
toOpenArray(t.toUnsafeView, 0, t.size - 1).deduplicate(isSorted = isSorted).toTensor
99+
else:
100+
# Clone the tensor in order to make it C continuous and then make it unique
101+
unique(t.clone(), isSorted = isSorted)
102+
103+
proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
104+
## Return a new sorted Tensor with the unique elements of the input Tensor
105+
##
106+
## Note that this is the "sorted" version of this procedure. There is
107+
## another version which doesn't get a `sort` argument that returns the
108+
## unique elements int he order in which they first appear ont he input.
109+
##
110+
## Inputs:
111+
## - t: The input Tensor
112+
## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`)
113+
##
114+
## Result:
115+
## - A new Tensor with the unique elements of the input Tensor sorted in the specified order.
116+
##
117+
## Examples:
118+
## ```nim
119+
## let
120+
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
121+
## unique_ascending_sort = dup.unique(order = SortOrder.Ascending)
122+
## unique_descending_sort = dup.unique(order = SortOrder.Descending)
123+
## assert unique_ascending_sort == [1, 2, 3, 4, 8].toTensor
124+
## assert unique_descending_sort == [8, 4, 3, 2, 1].toTensor
125+
## ```
126+
127+
if t.is_C_contiguous:
128+
# Note that since sorted returns a new sequence, it is safe to apply it
129+
# to a view of the raw data of the input tensor
130+
sorted(toOpenArray(t.toUnsafeView, 0, t.size - 1),
131+
order = order)
132+
.deduplicate(isSorted = true).toTensor
133+
else:
134+
# We need to clone the tensor in order to make it C continuous
135+
# and then we can make it unique assuming that it is already sorted
136+
sorted(t, order = order).unique(isSorted = true)

tests/tensor/test_algorithms.nim

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,24 @@ suite "[Core] Testing algorithm functions":
5050
let idxSorted = t.argsort(order = SortOrder.Descending)
5151
check idxSorted == exp
5252
check t[idxSorted] == @[7, 4, 3, 2, 1].toTensor()
53+
54+
test "Unique":
55+
block:
56+
let
57+
dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
58+
unique_unsorted = dup.unique
59+
unique_presorted_ascending = sorted(dup.unique).unique(isSorted = true)
60+
unique_presorted_descending = sorted(dup.unique, order = SortOrder.Descending).unique(isSorted = true)
61+
unique_sorted_ascending = dup.unique(order = SortOrder.Ascending)
62+
unique_sorted_descending = dup.unique(order = SortOrder.Descending)
63+
dup_not_C_continuous = dup[_ | 2]
64+
unique_not_c_continuous = dup_not_C_continuous.unique
65+
unique_sorted_not_c_continuous = dup_not_C_continuous.unique(order = SortOrder.Descending)
66+
67+
check unique_unsorted == [1, 3, 2, 4, 8].toTensor
68+
check unique_presorted_ascending == [1, 2, 3, 4, 8].toTensor
69+
check unique_presorted_descending == [8, 4, 3, 2, 1].toTensor
70+
check unique_sorted_ascending == [1, 2, 3, 4, 8].toTensor
71+
check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor
72+
check unique_not_c_continuous == [1, 2, 4].toTensor
73+
check unique_sorted_not_c_continuous == [4, 2, 1].toTensor

0 commit comments

Comments
 (0)