Skip to content

Commit 1bfbe0b

Browse files
authored
chore: add experimental blob read and write URL functions (#1308)
* chore: add experimental blob read and write URL functions * fix * fix
1 parent 3b53092 commit 1bfbe0b

File tree

2 files changed

+50
-16
lines changed

2 files changed

+50
-16
lines changed

bigframes/dataframe.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -748,9 +748,8 @@ def _repr_html_(self) -> str:
748748
if df[col].dtype == bigframes.dtypes.OBJ_REF_DTYPE
749749
]
750750
for col in blob_cols:
751-
df[col] = df[col]._apply_unary_op(ops.obj_fetch_metadata_op)
752751
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
753-
df[col] = df[col]._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
752+
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
754753

755754
# TODO(swast): pass max_columns and get the true column count back. Maybe
756755
# get 1 more column than we have requested so that pandas can add the

bigframes/operations/blob.py

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ def version(self) -> bigframes.series.Series:
6666
6767
Returns:
6868
BigFrames Series: Version as string."""
69-
# version must be retrived after fetching metadata
69+
# version must be retrieved after fetching metadata
7070
return self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field("version")
7171

7272
def metadata(self) -> bigframes.series.Series:
73-
"""Retrive the metadata of the Blob.
73+
"""Retrieve the metadata of the Blob.
7474
7575
.. note::
7676
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
@@ -85,7 +85,7 @@ def metadata(self) -> bigframes.series.Series:
8585
return bbq.json_extract(details_json, "$.gcs_metadata").rename("metadata")
8686

8787
def content_type(self) -> bigframes.series.Series:
88-
"""Retrive the content type of the Blob.
88+
"""Retrieve the content type of the Blob.
8989
9090
.. note::
9191
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
@@ -99,7 +99,7 @@ def content_type(self) -> bigframes.series.Series:
9999
)
100100

101101
def md5_hash(self) -> bigframes.series.Series:
102-
"""Retrive the md5 hash of the Blob.
102+
"""Retrieve the md5 hash of the Blob.
103103
104104
.. note::
105105
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
@@ -113,7 +113,7 @@ def md5_hash(self) -> bigframes.series.Series:
113113
)
114114

115115
def size(self) -> bigframes.series.Series:
116-
"""Retrive the file size of the Blob.
116+
"""Retrieve the file size of the Blob.
117117
118118
.. note::
119119
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
@@ -128,7 +128,7 @@ def size(self) -> bigframes.series.Series:
128128
)
129129

130130
def updated(self) -> bigframes.series.Series:
131-
"""Retrive the updated time of the Blob.
131+
"""Retrieve the updated time of the Blob.
132132
133133
.. note::
134134
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
@@ -146,6 +146,46 @@ def updated(self) -> bigframes.series.Series:
146146

147147
return bpd.to_datetime(updated, unit="us", utc=True)
148148

149+
def _get_runtime(
150+
self, mode: str, with_metadata: bool = False
151+
) -> bigframes.series.Series:
152+
"""Retrieve the ObjectRefRuntime as JSON.
153+
154+
Args:
155+
mode (str): mode for the URLs, "R" for read, "RW" for read & write.
156+
metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime.
157+
158+
Returns:
159+
bigframes Series: ObjectRefRuntime JSON.
160+
"""
161+
s = self._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata else self
162+
163+
return s._apply_unary_op(ops.ObjGetAccessUrl(mode=mode))
164+
165+
def read_url(self) -> bigframes.series.Series:
166+
"""Retrieve the read URL of the Blob.
167+
168+
.. note::
169+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
170+
171+
Returns:
172+
BigFrames Series: Read only URLs."""
173+
return self._get_runtime(mode="R")._apply_unary_op(
174+
ops.JSONValue(json_path="$.access_urls.read_url")
175+
)
176+
177+
def write_url(self) -> bigframes.series.Series:
178+
"""Retrieve the write URL of the Blob.
179+
180+
.. note::
181+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
182+
183+
Returns:
184+
BigFrames Series: Writable URLs."""
185+
return self._get_runtime(mode="RW")._apply_unary_op(
186+
ops.JSONValue(json_path="$.access_urls.write_url")
187+
)
188+
149189
def display(self, n: int = 3, *, content_type: str = ""):
150190
"""Display the blob content in the IPython Notebook environment. Only works for image type now.
151191
@@ -159,10 +199,7 @@ def display(self, n: int = 3, *, content_type: str = ""):
159199
# col name doesn't matter here. Rename to avoid column name conflicts
160200
df = bigframes.series.Series(self._block).rename("blob_col").head(n).to_frame()
161201

162-
obj_ref_runtime = df["blob_col"]._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
163-
df["read_url"] = obj_ref_runtime._apply_unary_op(
164-
ops.JSONValue(json_path="$.access_urls.read_url")
165-
)
202+
df["read_url"] = df["blob_col"].blob.read_url()
166203

167204
if content_type:
168205
df["content_type"] = content_type
@@ -231,10 +268,8 @@ def image_blur(
231268
connection=connection,
232269
).udf()
233270

234-
src_rt = bigframes.series.Series(self._block)._apply_unary_op(
235-
ops.ObjGetAccessUrl(mode="R")
236-
)
237-
dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW"))
271+
src_rt = self._get_runtime(mode="R")
272+
dst_rt = dst.blob._get_runtime(mode="RW")
238273

239274
src_rt = src_rt._apply_unary_op(ops.ToJSONString())
240275
dst_rt = dst_rt._apply_unary_op(ops.ToJSONString())

0 commit comments

Comments
 (0)