Skip to content

Commit 8c76795

Browse files
uros-dbHyukjinKwon
authored andcommitted
[SPARK-54142][GEO][SQL][PYTHON] Implement the st_srid function in Scala and PySpark
### What changes were proposed in this pull request? Implement the `st_srid` function in Scala and PySpark API. ### Why are the changes needed? Expand API support for the `ST_Srid` expression. ### Does this PR introduce _any_ user-facing change? Yes, the new function is now available in Scala and PySpark API. ### How was this patch tested? Added appropriate Scala function unit tests: - `STFunctionsSuite` Added appropriate PySpark function unit tests: - `test_functions` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #52841 from uros-db/geo-ST_Srid-scala. Authored-by: Uros Bojanic <uros.bojanic@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 78e9f70 commit 8c76795

File tree

7 files changed

+74
-0
lines changed

7 files changed

+74
-0
lines changed

python/docs/source/reference/pyspark.sql/functions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,7 @@ Geospatial ST Functions
660660
st_asbinary
661661
st_geogfromwkb
662662
st_geomfromwkb
663+
st_srid
663664

664665

665666
UDF, UDTF and UDT

python/pyspark/sql/connect/functions/builtin.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4807,6 +4807,13 @@ def st_geomfromwkb(wkb: "ColumnOrName") -> Column:
48074807
st_geomfromwkb.__doc__ = pysparkfuncs.st_geomfromwkb.__doc__
48084808

48094809

4810+
def st_srid(geo: "ColumnOrName") -> Column:
4811+
return _invoke_function_over_columns("st_srid", geo)
4812+
4813+
4814+
st_srid.__doc__ = pysparkfuncs.st_srid.__doc__
4815+
4816+
48104817
# Call Functions
48114818

48124819

python/pyspark/sql/functions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@
526526
"st_asbinary",
527527
"st_geogfromwkb",
528528
"st_geomfromwkb",
529+
"st_srid",
529530
# Call Functions
530531
"call_udf",
531532
"pandas_udf",

python/pyspark/sql/functions/builtin.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25971,6 +25971,31 @@ def st_geomfromwkb(wkb: "ColumnOrName") -> Column:
2597125971
return _invoke_function_over_columns("st_geomfromwkb", wkb)
2597225972

2597325973

25974+
@_try_remote_functions
25975+
def st_srid(geo: "ColumnOrName") -> Column:
25976+
"""Returns the SRID of the input GEOGRAPHY or GEOMETRY value.
25977+
25978+
.. versionadded:: 4.1.0
25979+
25980+
Parameters
25981+
----------
25982+
geo : :class:`~pyspark.sql.Column` or str
25983+
A geospatial value, either a GEOGRAPHY or a GEOMETRY.
25984+
25985+
Examples
25986+
--------
25987+
>>> from pyspark.sql import functions as sf
25988+
>>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb']) # noqa
25989+
>>> df.select(sf.st_srid(sf.st_geogfromwkb('wkb')).alias('result')).collect()
25990+
[Row(result=4326)]
25991+
>>> from pyspark.sql import functions as sf
25992+
>>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb']) # noqa
25993+
>>> df.select(sf.st_srid(sf.st_geomfromwkb('wkb')).alias('result')).collect()
25994+
[Row(result=0)]
25995+
"""
25996+
return _invoke_function_over_columns("st_srid", geo)
25997+
25998+
2597425999
# Call Functions
2597526000

2597626001

python/pyspark/sql/tests/test_functions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2822,6 +2822,21 @@ def test_st_asbinary(self):
28222822
)
28232823
self.assertEqual(results, [expected])
28242824

2825+
def test_st_srid(self):
2826+
df = self.spark.createDataFrame(
2827+
[(bytes.fromhex("0101000000000000000000F03F0000000000000040"),)],
2828+
["wkb"],
2829+
)
2830+
results = df.select(
2831+
F.st_srid(F.st_geogfromwkb("wkb")),
2832+
F.st_srid(F.st_geomfromwkb("wkb")),
2833+
).collect()
2834+
expected = Row(
2835+
4326,
2836+
0,
2837+
)
2838+
self.assertEqual(results, [expected])
2839+
28252840

28262841
class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
28272842
pass

sql/api/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9171,6 +9171,15 @@ object functions {
91719171
def st_geomfromwkb(wkb: Column): Column =
91729172
Column.fn("st_geomfromwkb", wkb)
91739173

9174+
/**
9175+
* Returns the SRID of the input GEOGRAPHY or GEOMETRY value.
9176+
*
9177+
* @group st_funcs
9178+
* @since 4.1.0
9179+
*/
9180+
def st_srid(geo: Column): Column =
9181+
Column.fn("st_srid", geo)
9182+
91749183
//////////////////////////////////////////////////////////////////////////////////////////////
91759184
// Scala UDF functions
91769185
//////////////////////////////////////////////////////////////////////////////////////////////

sql/core/src/test/scala/org/apache/spark/sql/STFunctionsSuite.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,20 @@ class STFunctionsSuite extends QueryTest with SharedSparkSession {
4242
"0101000000000000000000f03f0000000000000040"))
4343
}
4444

45+
/** ST accessor expressions. */
46+
47+
test("st_srid") {
48+
// Test data: Well-Known Binary (WKB) representations.
49+
val df = Seq[(String)](
50+
(
51+
"0101000000000000000000f03f0000000000000040"
52+
)).toDF("wkb")
53+
// ST_GeogFromWKB/ST_GeomFromWKB and ST_Srid.
54+
checkAnswer(
55+
df.select(
56+
st_srid(st_geogfromwkb(unhex($"wkb"))).as("col0"),
57+
st_srid(st_geomfromwkb(unhex($"wkb"))).as("col1")),
58+
Row(4326, 0))
59+
}
60+
4561
}

0 commit comments

Comments
 (0)