29
29
30
30
class Loader (Generic [IntoDataFrameT , IntoFrameT ]):
31
31
"""
32
- Load example datasets ** remotely* * from `vega-datasets`_, with caching.
32
+ Load example datasets *remotely* from `vega-datasets`_, with caching.
33
33
34
- A new ``Loader`` must be initialized by specifying a backend:
34
+ A new ``Loader`` must be initialized by specifying a backend::
35
35
36
36
from altair.datasets import Loader
37
37
38
38
load = Loader.from_backend("polars")
39
- >>> load # doctest: +SKIP
39
+ load
40
40
Loader[polars]
41
41
42
42
.. _vega-datasets:
@@ -81,42 +81,35 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]:
81
81
.. warning::
82
82
Most datasets use a `JSON format not supported`_ by ``pyarrow``
83
83
84
- .. _polars defaults:
85
- https://docs.pola.rs/api/python/stable/reference/io.html
86
- .. _pandas defaults:
87
- https://pandas.pydata.org/docs/reference/io.html
88
- .. _JSON format not supported:
89
- https://arrow.apache.org/docs/python/json.html#reading-json-files
90
-
91
84
Examples
92
85
--------
93
- Using ``polars``:
86
+ Using ``polars``::
94
87
95
88
from altair.datasets import Loader
96
89
97
90
load = Loader.from_backend("polars")
98
91
cars = load("cars")
99
92
100
- >>> type(cars) # doctest: +SKIP
93
+ type(cars)
101
94
polars.dataframe.frame.DataFrame
102
95
103
- Using ``pandas``:
96
+ Using ``pandas``::
104
97
105
98
load = Loader.from_backend("pandas")
106
99
cars = load("cars")
107
100
108
- >>> type(cars) # doctest: +SKIP
101
+ type(cars)
109
102
pandas.core.frame.DataFrame
110
103
111
- Using ``pandas``, backed by ``pyarrow`` dtypes:
104
+ Using ``pandas``, backed by ``pyarrow`` dtypes::
112
105
113
106
load = Loader.from_backend("pandas[pyarrow]")
114
107
cars = load("cars")
115
108
116
- >>> type(cars) # doctest: +SKIP
109
+ type(cars)
117
110
pandas.core.frame.DataFrame
118
111
119
- >>> cars.dtypes # doctest: +SKIP
112
+ cars.dtypes
120
113
Name string[pyarrow]
121
114
Miles_per_Gallon double[pyarrow]
122
115
Cylinders int64[pyarrow]
@@ -127,6 +120,13 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]:
127
120
Year timestamp[ns][pyarrow]
128
121
Origin string[pyarrow]
129
122
dtype: object
123
+
124
+ .. _polars defaults:
125
+ https://docs.pola.rs/api/python/stable/reference/io.html
126
+ .. _pandas defaults:
127
+ https://pandas.pydata.org/docs/reference/io.html
128
+ .. _JSON format not supported:
129
+ https://arrow.apache.org/docs/python/json.html#reading-json-files
130
130
"""
131
131
obj = Loader .__new__ (Loader )
132
132
obj ._reader = backend (backend_name )
@@ -154,24 +154,19 @@ def __call__(
154
154
**kwds
155
155
Arguments passed to the underlying read function.
156
156
157
- .. _Path.stem:
158
- https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
159
- .. _Path.suffix:
160
- https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
161
-
162
157
Examples
163
158
--------
164
- Using ``polars``:
159
+ Using ``polars``::
165
160
166
161
from altair.datasets import Loader
167
162
168
163
load = Loader.from_backend("polars")
169
164
source = load("iowa-electricity")
170
165
171
- >>> source.columns # doctest: +SKIP
166
+ source.columns
172
167
['year', 'source', 'net_generation']
173
168
174
- >>> source # doctest: +SKIP
169
+ source
175
170
shape: (51, 3)
176
171
┌────────────┬──────────────┬────────────────┐
177
172
│ year ┆ source ┆ net_generation │
@@ -191,15 +186,15 @@ def __call__(
191
186
│ 2017-01-01 ┆ Renewables ┆ 21933 │
192
187
└────────────┴──────────────┴────────────────┘
193
188
194
- Using ``pandas``:
189
+ Using ``pandas``::
195
190
196
191
load = Loader.from_backend("pandas")
197
192
source = load("iowa-electricity")
198
193
199
- >>> source.columns # doctest: +SKIP
194
+ source.columns
200
195
Index(['year', 'source', 'net_generation'], dtype='object')
201
196
202
- >>> source # doctest: +SKIP
197
+ source
203
198
year source net_generation
204
199
0 2001-01-01 Fossil Fuels 35361
205
200
1 2002-01-01 Fossil Fuels 35991
@@ -215,15 +210,15 @@ def __call__(
215
210
216
211
[51 rows x 3 columns]
217
212
218
- Using ``pyarrow``:
213
+ Using ``pyarrow``::
219
214
220
215
load = Loader.from_backend("pyarrow")
221
216
source = load("iowa-electricity")
222
217
223
- >>> source.column_names # doctest: +SKIP
218
+ source.column_names
224
219
['year', 'source', 'net_generation']
225
220
226
- >>> source # doctest: +SKIP
221
+ source
227
222
pyarrow.Table
228
223
year: date32[day]
229
224
source: string
@@ -232,6 +227,11 @@ def __call__(
232
227
year: [[2001-01-01,2002-01-01,2003-01-01,2004-01-01,2005-01-01,...,2013-01-01,2014-01-01,2015-01-01,2016-01-01,2017-01-01]]
233
228
source: [["Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels",...,"Renewables","Renewables","Renewables","Renewables","Renewables"]]
234
229
net_generation: [[35361,35991,36234,36205,36883,...,16476,17452,19091,21241,21933]]
230
+
231
+ .. _Path.stem:
232
+ https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
233
+ .. _Path.suffix:
234
+ https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
235
235
"""
236
236
return self ._reader .dataset (name , suffix , ** kwds )
237
237
@@ -261,16 +261,16 @@ def url(
261
261
262
262
Examples
263
263
--------
264
- The returned url will always point to an accessible dataset:
264
+ The returned url will always point to an accessible dataset::
265
265
266
266
import altair as alt
267
267
from altair.datasets import Loader
268
268
269
269
load = Loader.from_backend("polars")
270
- >>> load.url("cars") # doctest: +SKIP
271
- ' https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json'
270
+ load.url("cars")
271
+ " https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json"
272
272
273
- We can pass the result directly to a chart:
273
+ We can pass the result directly to a chart::
274
274
275
275
url = load.url("cars")
276
276
alt.Chart(url).mark_point().encode(x="Horsepower:Q", y="Miles_per_Gallon:Q")
@@ -282,19 +282,19 @@ def cache(self) -> DatasetCache[IntoDataFrameT, IntoFrameT]:
282
282
"""
283
283
Caching of remote dataset requests.
284
284
285
- Configure cache path:
285
+ Configure cache path::
286
286
287
287
self.cache.path = "..."
288
288
289
- Download the latest datasets *ahead-of-time*:
289
+ Download the latest datasets *ahead-of-time*::
290
290
291
291
self.cache.download_all()
292
292
293
- Remove all downloaded datasets:
293
+ Remove all downloaded datasets::
294
294
295
295
self.cache.clear()
296
296
297
- Disable caching:
297
+ Disable caching::
298
298
299
299
self.cache.path = None
300
300
"""
0 commit comments