Skip to content

Commit

Permalink
Closes #2918 add isempty for pdarray (#2933)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
  • Loading branch information
ajpotts and ajpotts authored Jan 31, 2024
1 parent d682e55 commit ab0bb3f
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 0 deletions.
78 changes: 78 additions & 0 deletions PROTO_tests/tests/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,84 @@ def test_string_isdigit(self):

assert example2.isdigit().to_list() == expected

def test_string_empty(self):
not_empty = ak.array([f"Strings {i}" for i in range(3)])
empty = ak.array(["" for i in range(3)])
example = ak.concatenate([not_empty, empty])

assert example.isempty().to_list() == [False, False, False, True, True, True]

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030",
"\u00B2",
]
)

expected = [
True,
False,
False,
False,
False,
False,
False,
False,
False,
False,
False,
]

assert example2.isempty().to_list() == expected

def test_string_empty(self):
not_empty = ak.array([f"%Strings {i}" for i in range(3)])
empty = ak.array(["" for i in range(3)])
example = ak.concatenate([not_empty, empty])

assert example.isempty().to_list() == [False, False, False, True, True, True]

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030",
"\u00B2",
]
)

expected = [
True,
False,
False,
False,
False,
False,
False,
False,
False,
False,
False,
]

assert example2.isempty().to_list() == expected

def test_where(self):
revs = ak.arange(10) % 2 == 0
s1 = ak.array([f"str {i}" for i in range(10)])
Expand Down
39 changes: 39 additions & 0 deletions arkouda/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,45 @@ def isdigit(self) -> pdarray:
)
)

def isempty(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is empty.
True for elements that are the empty string, False otherwise
Returns
-------
pdarray, bool
True for elements that are digits, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_empty = ak.array([f'Strings {i}' for i in range(3)])
>>> empty = ak.array(['' for i in range(3)])
>>> strings = ak.concatenate([not_empty, empty])
>>> strings
array(['%Strings 0', '%Strings 1', '%Strings 2', '', '', ''])
>>> strings.isempty()
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isempty", "objType": self.objType, "obj": self.entry}
)
)

@typechecked
def strip(self, chars: Optional[Union[bytes, str_scalars]] = "") -> Strings:
"""
Expand Down
4 changes: 4 additions & 0 deletions src/SegmentedComputation.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ module SegmentedComputation {
StringIsAlphaNumeric,
StringIsAlphabetic,
StringIsDigit,
StringIsEmpty,
}

proc computeOnSegments(segments: [?D] int, ref values: [?vD] ?t, param function: SegFunction, type retType, const strArg: string = "") throws {
Expand Down Expand Up @@ -120,6 +121,9 @@ module SegmentedComputation {
when SegFunction.StringIsDigit {
agg.copy(res[i], stringIsDigit(values, start..#len));
}
when SegFunction.StringIsEmpty {
agg.copy(res[i], stringIsEmpty(values, start..#len));
}
otherwise {
compilerError("Unrecognized segmented function");
}
Expand Down
4 changes: 4 additions & 0 deletions src/SegmentedMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ module SegmentedMsg {
truth.a = strings.isdigit();
repMsg = "created "+st.attrib(rname);
}
when "isempty" {
truth.a = strings.isempty();
repMsg = "created "+st.attrib(rname);
}
otherwise {
var errorMsg = notImplementedError(pn, "%s".doFormat(subcmd));
smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
Expand Down
15 changes: 15 additions & 0 deletions src/SegmentedString.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,14 @@ module SegmentedString {
return computeOnSegments(offsets.a, values.a, SegFunction.StringIsDigit, bool);
}

/*
Returns list of bools where index i indicates whether the string i of the SegString is empty
:returns: [domain] bool where index i indicates whether the string i of the SegString is empty
*/
proc isempty() throws {
return computeOnSegments(offsets.a, values.a, SegFunction.StringIsEmpty, bool);
}

proc bytesToUintArr(const max_bytes:int, lens: [?D] ?t, st) throws {
// bytes contained in strings < 128 bits, so concatenating is better than the hash
ref off = offsets.a;
Expand Down Expand Up @@ -1479,6 +1487,13 @@ module SegmentedString {
return interpretAsString(values, rng, borrow=true).isDigit();
}

/*
The SegFunction called by computeOnSegments for isempty
*/
inline proc stringIsEmpty(ref values, rng) throws {
return interpretAsString(values, rng, borrow=true).isEmpty();
}

inline proc stringBytesToUintArr(ref values, rng) throws {
var localSlice = new lowLevelLocalizingSlice(values, rng);
return | reduce [i in 0..#rng.size] (localSlice.ptr(i):uint)<<(8*(rng.size-1-i));
Expand Down
76 changes: 76 additions & 0 deletions tests/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,82 @@ def test_string_isdigit(self):

self.assertListEqual(example2.isdigit().to_list(), expected)

def test_string_empty(self):
not_empty = ak.array([f"Strings {i}" for i in range(3)])
empty = ak.array(["" for i in range(3)])
example = ak.concatenate([not_empty, empty])
self.assertListEqual(example.isempty().to_list(), [False, False, False, True, True, True])

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030",
"\u00B2",
]
)

expected = [
True,
False,
False,
False,
False,
False,
False,
False,
False,
False,
False,
]

self.assertListEqual(example2.isempty().to_list(), expected)

def test_string_empty(self):
not_empty = ak.array([f"%Strings {i}" for i in range(3)])
empty = ak.array(["" for i in range(3)])
example = ak.concatenate([not_empty, empty])
self.assertListEqual(example.isempty().to_list(), [False, False, False, True, True, True])

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030",
"\u00B2",
]
)

expected = [
True,
False,
False,
False,
False,
False,
False,
False,
False,
False,
False,
]

self.assertListEqual(example2.isempty().to_list(), expected)

def test_where(self):
revs = ak.arange(10) % 2 == 0
s1 = ak.array([f"str {i}" for i in range(10)])
Expand Down

0 comments on commit ab0bb3f

Please sign in to comment.