Skip to content

Commit 14b9b94

Browse files
authored
Add write support to json from pd.DataFrame (AmpX-AI#31)
add json write support
1 parent 55b3ebe commit 14b9b94

File tree

3 files changed

+34
-0
lines changed

3 files changed

+34
-0
lines changed

src/fsql/api.py

+3
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ def write_object(
147147
elif format == "csv":
148148
with fs.open(url_suff, "wb") as fd:
149149
data.to_csv(fd)
150+
elif format == "json":
151+
with fs.open(url_suff, "wb") as fd:
152+
data.to_json(fd)
150153
else:
151154
raise ValueError(f"unsupported format for dataframe writing: {format}")
152155
elif isinstance(data, io.StringIO) or isinstance(data, io.BytesIO):

tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Sets a mock/fake of the S3 filesystem for any `fsql`-based usage."""
2+
import json
23
import os
34

45
import fsspec
@@ -14,6 +15,10 @@ def put_s3_file(self, data, url):
1415
with self.s3fs.open(url, "wb") as fd:
1516
fd.write(data)
1617

18+
def read_json_file(self, url):
19+
with self.s3fs.open(url, "r") as fd:
20+
return json.load(fd)
21+
1722

1823
@pytest.fixture
1924
def helper():

tests/test_write_object.py

+26
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import json
23

34
import pandas as pd
45
import pytest
@@ -87,3 +88,28 @@ def test_write_vanilla_bytes(tmpdir):
8788
with open(path, "rb") as f:
8889
extracted = f.read()
8990
assert extracted == data
91+
92+
93+
def test_write_json_s3(helper):
94+
"""Writes a dataframe as json, tests that read works."""
95+
bucket = "test-bouquet"
96+
fs = helper.s3fs
97+
fs.mkdir(bucket)
98+
99+
input = pd.DataFrame({"k1": [1, 2], "k2": [3, 4]}, index=["one", "two"])
100+
write_object("s3://test-bouquet/my_df.json", input, format="json")
101+
output = helper.read_json_file("s3://test-bouquet/my_df.json")
102+
expected_output = {"k1": {"one": 1, "two": 2}, "k2": {"one": 3, "two": 4}}
103+
assert output == expected_output
104+
105+
106+
def test_write_json(tmpdir):
107+
"""Writes a json file, tests that read works."""
108+
input = pd.DataFrame({"k1": [1, 2], "k2": [3, 4]}, index=["one", "two"])
109+
path_base = tmpdir.join("my_file.json")
110+
url = f"file://{path_base}"
111+
write_object(url, input, format="json")
112+
with open(path_base, "r") as f:
113+
output = json.load(f)
114+
expected_output = {"k1": {"one": 1, "two": 2}, "k2": {"one": 3, "two": 4}}
115+
assert output == expected_output

0 commit comments

Comments
 (0)