Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[0.2.dev2] Template for saving data #95

Merged
merged 20 commits into from
Mar 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## 0.2 (not yet released)

#### 0.2.dev2 (2019-03-04)

- adds template for saving data: `urbansim_templates.data.SaveTable()`
- renames `TableFromDisk()` to `urbansim_templates.data.LoadTable()`

#### 0.2.dev1 (2019-02-27)

- fixes a crash in small MNL simulation
Expand Down
21 changes: 14 additions & 7 deletions docs/source/data-io.rst → docs/source/data-templates.rst
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
Data I/O template APIs
======================
Data template APIs
==================

Data i/o templates let you set up automated model steps for loading data into Orca or saving outputs to disk.
Data templates help you set up model steps for loading data into `Orca <https://udst.github.io/orca>`__ or saving outputs to disk.

These templates follow the same principles as the statistical model steps. For example, to set up a data table, create an instance of the ``TableFromDisk`` class and set some properties: the table name, file type, path, and anything else that's needed.
These templates follow the same principles as the statistical model steps. For example, to set up a data table, create an instance of the ``LoadTable`` class and set some properties: the table name, file type, path, and anything else that's needed.

Registering this object with ModelManager will save it to disk as a yaml file, and create an Orca step with instructions to set up the table. "Running" the object/step registers the table with Orca, but doesn't read the data from disk yet — Orca loads data lazily as it's needed.

Data registration steps are run automatically when you initialize ModelManager.


Table from disk
---------------
Loading data
------------

.. autoclass:: urbansim_templates.io.TableFromDisk
.. autoclass:: urbansim_templates.data.LoadTable
:members:


Saving data
-----------

.. autoclass:: urbansim_templates.data.SaveTable
:members:
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ UrbanSim Templates provides building blocks for Orca-based simulation models. It

The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca <https://udst.github.io/orca>`__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions.

v0.2.dev1, released February 27, 2019
v0.2.dev2, released March 4, 2019


Contents
Expand All @@ -22,6 +22,6 @@ Contents
getting-started
modelmanager
model-steps
data-io
data-templates
utilities
development
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='urbansim_templates',
version='0.2.dev1',
version='0.2.dev2',
description='UrbanSim extension for managing model steps',
author='UrbanSim Inc.',
author_email='info@urbansim.com',
Expand Down
80 changes: 43 additions & 37 deletions tests/test_tables.py → tests/test_data_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import orca

from urbansim_templates import modelmanager
from urbansim_templates.io import TableFromDisk
from urbansim_templates.data import LoadTable
from urbansim_templates.utils import validate_template


Expand Down Expand Up @@ -45,18 +45,37 @@ def teardown():

def test_template_validity():
"""
Run the template through the standard validation check.
Run the templates through the standard validation check.

"""
assert validate_template(TableFromDisk)
assert validate_template(LoadTable)


def test_property_persistence(orca_session):
"""
Test persistence of properties across registration, saving, and reloading.

"""
pass
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
t.extra_settings = {'make_data_awesome': True} # unfortunately not a valid setting
t.cache = False
t.cache_scope = 'iteration'
t.copy_col = False
t.name = 'buildings-csv'
t.tags = ['awesome', 'data']
t.autorun = False

d1 = t.to_dict()
modelmanager.register(t)
modelmanager.initialize()
d2 = modelmanager.get_step(t.name).to_dict()

assert d1 == d2
modelmanager.remove_step(t.name)


######################################
Expand All @@ -75,7 +94,7 @@ def test_validation_index_unique(orca_session):
d = {'id': [1,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index('id'))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
t.validate()


Expand All @@ -87,7 +106,7 @@ def test_validation_index_not_unique(orca_session):
d = {'id': [1,1,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index('id'))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -104,7 +123,7 @@ def test_validation_multiindex_unique(orca_session):
d = {'id': [1,1,1], 'sub_id': [1,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
t.validate()


Expand All @@ -117,7 +136,7 @@ def test_validation_multiindex_not_unique(orca_session):
d = {'id': [1,1,1], 'sub_id': [2,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -134,7 +153,7 @@ def test_validation_unnamed_index(orca_session):
d = {'id': [1,1,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d)) # generates auto index without a name

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -155,7 +174,7 @@ def test_validation_columns_vs_other_indexes(orca_session):
d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))

t = TableFromDisk(name='households')
t = LoadTable(table='households')
t.validate()


Expand All @@ -171,7 +190,7 @@ def test_validation_index_vs_other_columns(orca_session):
d = {'household_id': [1,2,3], 'building_id': [2,3,5]}
orca.add_table('households', pd.DataFrame(d).set_index('household_id'))

t = TableFromDisk(name='buildings')
t = LoadTable(table='buildings')
t.validate()


Expand All @@ -188,14 +207,11 @@ def test_validation_with_multiindexes(orca_session):
d = {'home_tract': [55,55,55], 'work_tract': [17,18,19], 'dist': [1,1,1]}
orca.add_table('distances', pd.DataFrame(d).set_index(['home_tract','work_tract']))

t = TableFromDisk(name='choice_table')
t = LoadTable(table='choice_table')
t.validate()


# test that parameters make it through a save
# test validation with stand-alone columns

# test loading an h5 file works
# test passing cache settings


Expand All @@ -208,8 +224,8 @@ def test_csv(orca_session, data):
Test loading data from a CSV file.

"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
Expand All @@ -223,16 +239,16 @@ def test_csv(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)


def test_hdf(orca_session, data):
"""
Test loading data from an HDF file.

"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'hdf'
t.path = 'data/buildings.hdf'

Expand All @@ -245,16 +261,16 @@ def test_hdf(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)


def test_extra_settings(orca_session, data):
"""
Test loading data with extra settings, e.g. for compressed files.

"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv.gz'
t.csv_index_cols = 'building_id'
Expand All @@ -269,26 +285,16 @@ def test_extra_settings(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')


def test_windows_paths(orca_session, data):
"""
Test in Windows that a Windows-style path is properly normalized.

TO DO - implement

"""
pass
modelmanager.remove_step(t.name)


def test_without_autorun(orca_session, data):
"""
Confirm that disabling autorun works.

"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
Expand All @@ -297,7 +303,7 @@ def test_without_autorun(orca_session, data):
modelmanager.register(t)
assert 'buildings' not in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)



Loading