Pyomo · DLWoodruff · Sep 20, 2024 · Oct 1, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/.github/workflows/test_pr_and_main.yml b/.github/workflows/test_pr_and_main.yml
@@ -324,6 +324,7 @@ jobs:
           conda install mpi4py pandas setuptools
           pip install pyomo sphinx sphinx_rtd_theme dill gridx-egret cplex pybind11
           pip install xpress
+          pip install dill
 
       - name: Build pyomo extensions
         run: |

diff --git a/doc/src/index.rst b/doc/src/index.rst
@@ -27,7 +27,7 @@ MPI is used.
    ef.rst
    nompi4py.rst
    secretmenu.rst
-   pickledbundles.rst
+   properbundles.rst
    grad_rho.rst
    w_rho.rst
    admmWrapper.rst

diff --git a/doc/src/pickledbundles.rst b/doc/src/pickledbundles.rst
diff --git a/doc/src/properbundles.rst b/doc/src/properbundles.rst
@@ -0,0 +1,75 @@
+Proper Bundles
+==============
+
+Prior to 2024, bundles were constructed for the purpose of solves, but
+all other processing (e.g., computing W values) was done on individual
+scenarios. We will refer to these as `loose bundles`. This bundling scheme
+is very flexible with respect to the numbers of scenarios in each bundle.
+There are various if-blocks in the mpisppy code to support this type of bundle.
+
+In 2024, `proper bundles` were supported. After the extensive form
+for a proper bundle is created, the original scenarios are more or less
+forgotten and all processing takes place for the bundle. At the time
+of this writing, these bundles are a little less flexible in that
+the number of scenarios per bundle must divide the number of scenarios
+and randomizing the assignment of scenarios to bundles is left to the
+user (e.g., but using a pseudo-random vector to provide one level
+of indirection for the scenario number in the ``scenario_creator`` function).
+As of the time of this writing, only two-stage problems are easily supported.
+Proper bundles result in faster execution than loose bundles.
+
+See ``mpisppy.generic_cylinders.py`` for an example of their use in
+code and see ``examples.generic_cylinders.bash`` for a few proper
+bundle command lines.  In addition to being created on the fly and
+used with ``--proper-no-files``, then can be written (but not used in
+the same run) with ``--pickle-bundles-dir`` (note the the directory
+specified will be overwritten), and read before use with
+``--unpickle-bundles-dir``.  In all uses of bundles in
+``mpisppy.generic_cylinders.py`` the ``--scenarios-per-bundle`` option
+must be specified (even when reading).
+
+.. Note::
+   When writing bundles in ``mpisppy.generic_cylinders.py``, all
+   ranks are used for forming and writing bundles. Command line
+   options related to anything other than proper bundles are ignored.
+
+.. Note::
+   Reading and writing pickle files only works with proper bundles, not
+   loose bundles.
+
+.. Note::
+   If you do pseudo random number generation on-the-fly during scenario creation,
+   very careful management of random seeds is required if you want to
+   get the same scenarios with proper  bundles that you get without them.
+
+Modules
+-------
+
+In addition to command line options specified in ``mpisppy.utils.config.py``
+there are two modules that have most of the support for proper bundles:
+
+  - ``mpisppy.utils.pickle_bundle.py`` has miscellaneous utilities related to picking and other data processing
+  - ``mpisppy.utils.proper_bundler.py`` has wrappers for cylinder programs
+
+
+Multistage and notes
+--------------------
+
+At the time of this writing, multi-stage proper, pickled bundles is a
+little bit beyond the bleeding edge.  The idea is that bundles are
+formed and then saved as dill pickle files for rapid retrieval. The
+file ``aircond_cylinders.py`` in the aircond example directory
+provides an example.  The latter part of the ``allways.bash`` script
+demonstrates how to run it.
+
+Pickled bundles are clearly useful for algorithm tuning and algorithm
+experimentation. In some, but not all, settings they can also improve
+wall-clock performance for a single optimization run. The pickler
+(e.g., ``bundle_pickler.py`` in the aircond example) does not use a
+solver and can be run once to provide bundles to all cylinders. It can
+often be assigned as many ranks as the total number of CPUs
+available. Reading the bundles from a pickle file is much faster
+than creating them.
+
+The trick is that the bundles must contain entire second stage nodes
+so the resulting bundles represent a two-stage problem.
diff --git a/examples/aircond/aircond_cylinders.py b/examples/aircond/aircond_cylinders.py
@@ -153,7 +153,7 @@ def _parse_args():
                          domain=bool,
                          default=False)    
     # special "proper" bundle arguments
-    pickle_bundle.pickle_bundle_config(cfg)
+    cfg.proper_bundle_config()
 
     cfg.add_to_config("EF_directly",
                          description="Solve the EF directly instead of using cylinders (default False)",

diff --git a/examples/aircond/bundle_pickler.py b/examples/aircond/bundle_pickler.py
@@ -14,7 +14,6 @@
 import numpy as np
 import mpisppy.tests.examples.aircondB as aircondB
 from mpisppy.utils import config
-from mpisppy.utils import pickle_bundle
 
 from mpisppy import MPI
 
@@ -27,7 +26,7 @@
 def _parse_args():
     cfg = config.Config()
     cfg.multistage()
-    pickle_bundle.pickle_bundle_config(cfg)
+    cfg.proper_bundle_config()
     aircondB.inparser_adder(cfg)
     cfg.parse_command_line("bundle_pickler for aircond")
 

diff --git a/examples/generic_cylinders.bash b/examples/generic_cylinders.bash
@@ -3,6 +3,30 @@
 
 SOLVER="cplex"
 
+echo "^^^ use proper bundles without writing ^^^"
+cd sslp
+mpiexec -np 3 python -m mpi4py ../../mpisppy/generic_cylinders.py --module-name sslp --sslp-data-path ./data --instance-name sslp_15_45_10 --proper-no-files --scenarios-per-bundle 5 --default-rho 1 --solver-name ${SOLVER} --max-iterations 5 --lagrangian --xhatshuffle --rel-gap 0.001
+cd ..
+
+echo "^^^ write pickle bundles ^^^"
+cd sslp
+python -m mpi4py ../../mpisppy/generic_cylinders.py --module-name sslp --sslp-data-path ./data --instance-name sslp_15_45_10 --pickle-bundles-dir sslp_pickles --scenarios-per-bundle 5 --default-rho 1
+cd ..
+
+echo "^^^ write pickle bundles faster ^^^"
+# np needs to divide the number of scenarios, btw
+cd sslp
+mpiexec -np 2 python -m mpi4py ../../mpisppy/generic_cylinders.py --module-name sslp --sslp-data-path ./data --instance-name sslp_15_45_10 --pickle-bundles-dir sslp_pickles --scenarios-per-bundle 5 --default-rho 1
+cd ..
+
+echo "^^^ read pickle bundles ^^^"
+cd sslp
+mpiexec -np 3 python -m mpi4py ../../mpisppy/generic_cylinders.py --module-name sslp --sslp-data-path ./data --instance-name sslp_15_45_10 --unpickle-bundles-dir sslp_pickles --scenarios-per-bundle 5 --default-rho 1 --solver-name=${SOLVER} --max-iterations 5 --lagrangian --xhatshuffle --rel-gap 0.001
+cd ..
+
+echo "exit"
+exit
+
 cd farmer
 mpiexec -np 3 python -m mpi4py ../../mpisppy/generic_cylinders.py --module-name farmer --num-scens 3 --solver-name ${SOLVER} --max-iterations 10 --max-solver-threads 4 --default-rho 1 --lagrangian --xhatshuffle --rel-gap 0.01 --solution-base-name farmer_nonants
 

diff --git a/examples/generic_tester.py b/examples/generic_tester.py
@@ -8,6 +8,7 @@
 ###############################################################################
 # This might make a mess in terms of output files....
 # (re)baseline by uncommenting rebaseline_xhat lines
+# The baselines are in the subdirectories of the examples/test_data 
 # NOTE: the asynchronous nature of mip-sppy makes it hard to hit baselines.
 # Run a lot of generic_cylinders examples for regression testing; dlw Aug 2024
 # Not intended to be user-friendly.
@@ -160,7 +161,25 @@ def do_one(dirname, modname, np, argstring, xhat_baseline_dir=None, tol=1e-6):
           "--lagrangian --xhatshuffle --rel-gap 0.001 --branching-factors '3 3' "
           f"--stage2EFsolvern {solver_name} --solver-name={solver_name}")
 #rebaseline_xhat("hydro", "hydro", 3, hydroa, "test_data/hydroa_baseline")
-do_one("hydro", "hydro", 3, hydroa, xhat_baseline_dir = "test_data/hydroa_baseline")
+do_one("hydro", "hydro", 3, hydroa, xhat_baseline_dir="test_data/hydroa_baseline")
+
+# proper bundles
+sslp_pb = ("--sslp-data-path ./data --instance-name sslp_15_45_10 "
+           "--proper-no-files --scenarios-per-bundle 5 --default-rho 1 "
+           f"--solver-name {solver_name} --max-iterations 5 --lagrangian "
+           "--xhatshuffle --rel-gap 0.001")
+#rebaseline_xhat("sslp", "sslp", 3, sslp_pb, "test_data/sslp_pb_baseline")
+do_one("sslp", "sslp", 3, sslp_pb, xhat_baseline_dir="test_data/sslp_pb_baseline")
+
+# write, then read, pickle bundles
+sslp_wr = "--module-name sslp --sslp-data-path ./data --instance-name sslp_15_45_10 --pickle-bundles-dir sslp_pickles --scenarios-per-bundle 5 --default-rho 1"
+do_one("sslp", "sslp", 2, sslp_wr, xhat_baseline_dir=None)
+sslp_rd = ("--sslp-data-path ./data --instance-name sslp_15_45_10 "
+           "--unpickle-bundles-dir sslp_pickles --scenarios-per-bundle 5 "
+           f"--default-rho 1 --solver-name={solver_name} "
+           "--max-iterations 5 --lagrangian --xhatshuffle --rel-gap 0.001")
+#rebaseline_xhat("sslp", "sslp", 3, sslp_rd, "test_data/sslp_rd_baseline")
+do_one("sslp", "sslp", 3, sslp_rd, xhat_baseline_dir="test_data/sslp_rd_baseline")
 
 
 if not nouc:

diff --git a/examples/sslp/sslp.py b/examples/sslp/sslp.py
@@ -57,7 +57,7 @@ def scenario_names_creator(num_scens,start=None):
     # if start!=None, the list starts with the 'start' labeled scenario
     if (start is None) :
         start=1
-    return [f"Scenario{i}" for i in range(start,start+num_scens)]
+    return [f"Scenario{i}" for i in range(start, start+num_scens)]
 
 
 #=========

diff --git a/examples/test_data/sslp_pb_baseline/sslp.csv b/examples/test_data/sslp_pb_baseline/sslp.csv
@@ -0,0 +1,15 @@
+Scenario1.FacilityOpen[1],1.0
+Scenario1.FacilityOpen[2],0.0
+Scenario1.FacilityOpen[3],-0.0
+Scenario1.FacilityOpen[4],1.0
+Scenario1.FacilityOpen[5],0.0
+Scenario1.FacilityOpen[6],0.0
+Scenario1.FacilityOpen[7],0.0
+Scenario1.FacilityOpen[8],1.0
+Scenario1.FacilityOpen[9],-0.0
+Scenario1.FacilityOpen[10],0.0
+Scenario1.FacilityOpen[11],1.0
+Scenario1.FacilityOpen[12],0.0
+Scenario1.FacilityOpen[13],0.0
+Scenario1.FacilityOpen[14],0.0
+Scenario1.FacilityOpen[15],1.0
diff --git a/examples/test_data/sslp_pb_baseline/sslp.npy b/examples/test_data/sslp_pb_baseline/sslp.npy