Skip to content

Commit 53cdb11

Browse files
authored
Add list_cat, list_concat, list_repeat (#942)
* Add list_cat, list_concat * Add list_repeat * docs: add examples for list_cat, list_concat, and list_repeat functions * Amend list_repeat code example - literal * Amend list_ to array_ in documentation
1 parent e3e55b7 commit 53cdb11

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

docs/source/user-guide/common-operations/expressions.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,35 @@ This function returns an integer indicating the total number of elements in the
110110
111111
In this example, the `num_elements` column will contain `3` for both rows.
112112

113+
To concatenate two arrays, you can use the function :py:func:`datafusion.functions.array_cat` or :py:func:`datafusion.functions.array_concat`.
114+
These functions return a new array that is the concatenation of the input arrays.
115+
116+
.. ipython:: python
117+
118+
from datafusion import SessionContext, col
119+
from datafusion.functions import array_cat, array_concat
120+
121+
ctx = SessionContext()
122+
df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[4, 5, 6]]})
123+
df.select(array_cat(col("a"), col("b")).alias("concatenated_array"))
124+
125+
In this example, the `concatenated_array` column will contain `[1, 2, 3, 4, 5, 6]`.
126+
127+
To repeat the elements of an array a specified number of times, you can use the function :py:func:`datafusion.functions.array_repeat`.
128+
This function returns a new array with the elements repeated.
129+
130+
.. ipython:: python
131+
132+
from datafusion import SessionContext, col, literal
133+
from datafusion.functions import array_repeat
134+
135+
ctx = SessionContext()
136+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
137+
df.select(array_repeat(col("a"), literal(2)).alias("repeated_array"))
138+
139+
In this example, the `repeated_array` column will contain `[[1, 2, 3], [1, 2, 3]]`.
140+
141+
113142
Structs
114143
-------
115144

python/datafusion/functions.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@
147147
"length",
148148
"levenshtein",
149149
"list_append",
150+
"list_cat",
151+
"list_concat",
150152
"list_dims",
151153
"list_distinct",
152154
"list_element",
@@ -162,6 +164,7 @@
162164
"list_prepend",
163165
"list_push_back",
164166
"list_push_front",
167+
"list_repeat",
165168
"list_remove",
166169
"list_remove_all",
167170
"list_remove_n",
@@ -1145,6 +1148,22 @@ def array_distinct(array: Expr) -> Expr:
11451148
return Expr(f.array_distinct(array.expr))
11461149

11471150

1151+
def list_cat(*args: Expr) -> Expr:
1152+
"""Concatenates the input arrays.
1153+
1154+
This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
1155+
"""
1156+
return array_concat(*args)
1157+
1158+
1159+
def list_concat(*args: Expr) -> Expr:
1160+
"""Concatenates the input arrays.
1161+
1162+
This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
1163+
"""
1164+
return array_concat(*args)
1165+
1166+
11481167
def list_distinct(array: Expr) -> Expr:
11491168
"""Returns distinct values from the array after removing duplicates.
11501169
@@ -1369,6 +1388,14 @@ def array_repeat(element: Expr, count: Expr) -> Expr:
13691388
return Expr(f.array_repeat(element.expr, count.expr))
13701389

13711390

1391+
def list_repeat(element: Expr, count: Expr) -> Expr:
1392+
"""Returns an array containing ``element`` ``count`` times.
1393+
1394+
This is an alias for :py:func:`array_repeat`.
1395+
"""
1396+
return array_repeat(element, count)
1397+
1398+
13721399
def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
13731400
"""Replaces the first occurrence of ``from_val`` with ``to_val``."""
13741401
return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr))

python/tests/test_functions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,14 @@ def py_flatten(arr):
291291
lambda col: f.array_cat(col, col),
292292
lambda data: [np.concatenate([arr, arr]) for arr in data],
293293
],
294+
[
295+
lambda col: f.list_cat(col, col),
296+
lambda data: [np.concatenate([arr, arr]) for arr in data],
297+
],
298+
[
299+
lambda col: f.list_concat(col, col),
300+
lambda data: [np.concatenate([arr, arr]) for arr in data],
301+
],
294302
[
295303
lambda col: f.array_dims(col),
296304
lambda data: [[len(r)] for r in data],
@@ -439,6 +447,10 @@ def py_flatten(arr):
439447
lambda col: f.array_repeat(col, literal(2)),
440448
lambda data: [[arr] * 2 for arr in data],
441449
],
450+
[
451+
lambda col: f.list_repeat(col, literal(2)),
452+
lambda data: [[arr] * 2 for arr in data],
453+
],
442454
[
443455
lambda col: f.array_replace(col, literal(3.0), literal(4.0)),
444456
lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data],

0 commit comments

Comments
 (0)