diff --git a/.gitignore b/.gitignore
index 894a44c..a0edd63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,104 +1,5 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
+gym_forest/datasets
+models
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..06fa11d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include gym_forest/datasets *
\ No newline at end of file
diff --git a/README.md b/README.md
index 46444f4..d66284e 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,104 @@
-# gym-forest
\ No newline at end of file
+>Notice: This is research code that will not necessarily be maintained to
+>support further releases of Forest and other Rigetti Software. We welcome bug
+>reports and PRs but make no guarantee about fixes or responses.
+
+# gym-forest
+
+Gym environment for classical synthesis of quantum programs. For more
+information about this, see our paper "Automated Quantum Programming via
+Reinforcement Learning for Combinatorial Optimization".
+
+
+## Installation
+
+In addition to cloning this repository, you will need to download the data files
+associated with the latest GitHub release. They should be unzipped at the
+toplevel of the `gym-forest` code directory.
+
+```
+git clone https://github.com/rigetti/gym-forest.git
+cd gym-forest
+tar -xzf data.tar.bz2
+pip install .
+```
+
+## Usage
+
+This library provides several OpenAI gym environments available for
+reinforcement learning tasks. For a very simple example, try the following at a
+python repl:
+
+```
+>>> import gym
+>>> import gym_forest
+
+>>> env = gym.make('forest-train-qvm-v0')
+>>> obs = env.reset()
+>>> print(obs)
+...
+```
+
+This environment contains problem instances from the combined MaxCut, MaxQP, and
+QUBO training sets. Resetting the environment selects a random problem instance.
+
+Actions are represented numerically, but encode a discrete set of gates:
+```
+>>> action = env.action_space.sample()
+>>> env.instrs[action]
+<Gate RX(3*pi/2) 9>
+>>> obs, reward, done, info = env.step(action)
+...
+```
+
+For more information, please take a look at `gym_forest/envs/gym_forest.py` and
+comments within.
+
+## Available Environments
+
+We provide sets of randomly generated combinatorial optimization problems, and
+Gym environments for solving these using a quantum resource, either simulated
+(QVM) or real (QPU). The datasets are described in more detail in our paper, but
+we summarize the environments below:
+
+
+| Environment Name           | Resource | Problem Type        | Split      |
+|----------------------------|----------|---------------------|------------|
+| `'forest-train-qvm-v0'`    | QVM      | Maxcut, MaxQP, QUBO | Training   |
+| `'forest-train-qpu-v0'`    | QPU      | Maxcut, MaxQP, QUBO | Training   |
+| `'forest-maxcut-valid-v0'` | QVM      | Maxcut              | Validation |
+| `'forest-maxqp-valid-v0'`  | QVM      | MaxQP               | Validation |
+| `'forest-qubo-valid-v0'`   | QVM      | QUBO                | Validation |
+| `'forest-maxcut-test-v0'`  | QVM      | Maxcut              | Testing    |
+| `'forest-maxqp-test-v0'`   | QVM      | MaxQP               | Testing    |
+| `'forest-qubo-test-v0'`    | QVM      | QUBO                | Testing    |
+
+## Examples
+
+The models in our paper were developed using the [stable
+baselines](https://github.com/hill-a/stable-baselines) library. To use this, you
+may `pip install -r examples/requirements.txt`.
+
+### Example: Single-episode Rollout
+
+See the code in `examples/rollout_episode.py` for an example showing the
+behavior of at "QVM-trained" model on a a random maxcut test problem.
+
+Note: This example uses the saved model weights, included in `data.tar.bz2`.
+
+### Example: Training PPO agents.
+
+See the code in `examples/train.py` for an example of how training can be
+performed.
+
+## Citation
+
+If you use this code, please cite:
+
+```
+@article{mckiernan2019automated,
+  title={Automated Quantum Programming via Reinforcement Learning for Combinatorial Optimization},
+  author={McKiernan, K. and Davis, E. and Alam, M. S. and Rigetti, C.},
+  note={TODO},
+  year={2019}
+}
+```
diff --git a/VERSION.txt b/VERSION.txt
new file mode 100644
index 0000000..8acdd82
--- /dev/null
+++ b/VERSION.txt
@@ -0,0 +1 @@
+0.0.1
diff --git a/examples/requirements.txt b/examples/requirements.txt
index f758e68..ae9c793 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -1 +1,2 @@
+tensorflow
 stable_baselines==2.6.0
diff --git a/examples/rollout_episode.py b/examples/rollout_episode.py
new file mode 100644
index 0000000..eea3bdd
--- /dev/null
+++ b/examples/rollout_episode.py
@@ -0,0 +1,46 @@
+# Copyright 2019, Rigetti Computing
+#
+# This source code is licensed under the Apache License, Version 2.0 found in
+# the LICENSE.txt file in the root directory of this source tree.
+
+
+# This is an example of a single-episode rollout, using the QVM trained agent
+# on the MaxCut test set.
+
+import os
+
+import gym
+import gym_forest
+from stable_baselines import PPO2
+
+MODEL_FILE = os.path.join(os.path.dirname(__file__), '..', 'models', 'qvm.p')
+ENV_NAME = 'forest-maxcut-test-v0'
+MAX_STEPS = 25
+
+env = gym.make(ENV_NAME)
+agent = PPO2.load(MODEL_FILE)
+
+obs = env.reset()
+best_reward = 0
+eps_reward = 0
+for i in range(MAX_STEPS):
+    action, _ = agent.predict(obs)
+    obs, reward, done, info = env.step(action)
+    eps_reward += reward
+    if done:
+        # early termination returns the remaining episode reward,
+        # assuming that we do just as well on the remaining steps
+        # here we get the corresponding single-step reward
+        single_step_reward = reward/(MAX_STEPS-i)
+        print('[{}]\t {}\t reward {:.3f}'.format(i, info['instr'], single_step_reward))
+        best_reward = max(best_reward, single_step_reward)
+        if i + 1 < MAX_STEPS:
+            print("Terminated early!")
+        break
+    else:
+        print('[{}]\t {}\t reward {:.3f}'.format(i, info['instr'], reward))
+        best_reward = max(reward, best_reward)
+
+print("\n\nCompleted Episode\n")
+print("Total Episode Reward: {0:.3f}".format(eps_reward))
+print("Max Single-Step Reward: {0:.3f}".format(best_reward))
diff --git a/examples/train.py b/examples/train.py
index 04f70ef..df3c9f4 100644
--- a/examples/train.py
+++ b/examples/train.py
@@ -1,3 +1,12 @@
+# Copyright 2019, Rigetti Computing
+#
+# This source code is licensed under the Apache License, Version 2.0 found in
+# the LICENSE.txt file in the root directory of this source tree.
+
+
+# This contains an example of training a `stable_baselines` agent
+# against a gym_forest environment.
+
 import os
 import datetime
 
@@ -21,7 +30,7 @@
 parameters = [env_name, policy.__name__, n_steps, lam, seed, time_stamp]
 
 label = '-'.join([str(i) for i in parameters])
-log_dir = os.path.join('..', 'models', label)
+log_dir = os.path.join(os.path.dirname(__file__), '..', 'models', label)
 if not os.path.exists(log_dir):
     os.makedirs(log_dir)
 print(log_dir)
diff --git a/gym_forest/__init__.py b/gym_forest/__init__.py
index 1ae9fae..5a85ec1 100644
--- a/gym_forest/__init__.py
+++ b/gym_forest/__init__.py
@@ -1,74 +1,60 @@
+import os
 import logging
 
 from gym.envs.registration import register
 
 logger = logging.getLogger(__name__)
 
+def datapath(name):
+    return os.path.join(os.path.dirname(__file__), 'datasets', name)
+
 register(
     id='forest-train-qvm-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/train_n10.npy',
-            'label': '../datasets/labels_train_n10.npy',
+    kwargs={'data': datapath('all_data_train.npy'),
+            'label': datapath('all_data_labels.npy'),
             'shuffle': True})
 
 register(
     id='forest-train-qpu-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/train_n10.npy',
-            'label': '../datasets/labels_train_n10.npy',
+    kwargs={'data': datapath('all_data_train.npy'),
+            'label': datapath('all_data_labels.npy'),
             'shuffle': True,
             'qpu': True})
 
 register(
     id='forest-maxcut-valid-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxcut_n10_e5_valid.npy',
+    kwargs={'data': datapath('maxcut_n10_e5_valid.npy'),
             'label': 'maxcut'})
 
 register(
     id='forest-maxqp-valid-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxqp_n10_valid.npy',
+    kwargs={'data': datapath('maxqp_n10_valid.npy'),
             'label': 'maxqp'})
 
 register(
     id='forest-qubo-valid-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/qubo_n10_valid.npy',
+    kwargs={'data': datapath('qubo_n10_valid.npy'),
             'label': 'qubo'})
 
 register(
     id='forest-maxcut-test-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxcut_n10_e5_test.npy',
-            'label': 'maxcut'})
-
-register(
-    id='forest-maxqp-test-v0',
-    entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxqp_n10_test.npy',
-            'label': 'maxqp'})
-
-register(
-    id='forest-qubo-test-v0',
-    entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/qubo_n10_test.npy',
-            'label': 'qubo'})
-
-register(
-    id='forest-maxcut-test-qpu-v0',
-    entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxcut_n10_e5_test.npy',
+    kwargs={'data': datapath('maxcut_n10_e5_test.npy'),
             'label': 'maxcut'})
 
 register(
     id='forest-maxqp-test-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/maxqp_n10_test.npy',
+    kwargs={'data': datapath('maxqp_n10_test.npy'),
             'label': 'maxqp'})
 
 register(
     id='forest-qubo-test-v0',
     entry_point='gym_forest.envs:ForestDiscreteEnv',
-    kwargs={'data': '../datasets/qubo_n10_test.npy',
+    kwargs={'data': datapath('qubo_n10_test.npy'),
             'label': 'qubo'})
diff --git a/gym_forest/envs/gym_forest.py b/gym_forest/envs/gym_forest.py
index 15aab9e..db9d32d 100644
--- a/gym_forest/envs/gym_forest.py
+++ b/gym_forest/envs/gym_forest.py
@@ -1,3 +1,8 @@
+# Copyright 2019, Rigetti Computing
+#
+# This source code is licensed under the Apache License, Version 2.0 found in
+# the LICENSE.txt file in the root directory of this source tree.
+
 import os
 from abc import ABCMeta, abstractmethod
 from itertools import product
@@ -17,10 +22,11 @@
 from pyquil.quilbase import Gate
 from pyquil.unitary_tools import all_bitstrings
 
-NUM_ANGLES = 8
-NUM_SHOTS = 10
-MAX_PROGRAM_LENGTH = 25
+NUM_ANGLES = 8            # discrete actions involve rotation by multiples of 2*pi/NUM_ANGLES
+NUM_SHOTS = 10            # how many measurement shots to use? for a N qubit problem this produces N*NUM_SHOTS bits of data
+MAX_PROGRAM_LENGTH = 25   # limit to the number of actions taken in a given episode
 
+QPU_NAME = 'Aspen-4-16Q-A'
 
 class MinimalPyQVMCompiler(AbstractCompiler):
     def quil_to_native_quil(self, program):
@@ -31,10 +37,20 @@ def native_quil_to_executable(self, nq_program):
 
 
 def bitstring_index(bitstring):
+    "Recover an integer from its bitstring representation."
     return int("".join(map(str, bitstring)), 2)
 
 
 def lift_bitstring_function(n, f):
+    """Lifts a function defined on single bitstrings to arrays of bitstrings.
+
+    Args:
+        n: The number of bits in the bitsring.
+        f: The bitstring function, which produces a float value.
+    Returns:
+        A function which, given a K x n array of 0/1 values, returns the
+        mean of f applied across the K rows.
+    """
     bss = all_bitstrings(n).astype(np.float64)
     vals = np.apply_along_axis(f, 1, bss)
     # normalize to be between 0 and 1
@@ -53,26 +69,38 @@ def _fn(bitstrings):
 
 
 class ProblemSet(metaclass=ABCMeta):
+    """Base class representing an abstract problem set."""
     @property
     @abstractmethod
     def num_problems(self) -> int:
+        "The number of problems in the problem set."
         pass
 
     @property
     @abstractmethod
     def num_variables(self) -> int:
+        "The number of variables in any problem."
         pass
 
     @abstractmethod
     def problem(self, i: int) -> np.ndarray:
+        "An array representing the ith problem."
         pass
 
     @abstractmethod
     def bitstrings_score(self, i: int) -> Callable[[np.ndarray], float]:
+        "The scoring function associated with problem i."
         pass
 
 
 class AllProblems(ProblemSet):
+    """A problem set of combinatorial optimization problems.
+
+    Args:
+        weights: A numpy array of weight matrices. weights[k,i,j] is the
+                 coupling between vertex i and j in the kth problem.
+        labels: A list of string labels, either 'maxcut', 'maxqp', or 'qubo'.
+    """
     def __init__(self, weights, labels):
         assert len(weights.shape) == 3
         assert weights.shape[1] == weights.shape[2]
@@ -88,6 +116,7 @@ def num_variables(self):
         return self._weights.shape[1]
 
     def problem(self, i):
+        # due to the symmetry of these problems, we only need to observe the upper triangular entries
         upper = np.triu_indices(self._weights.shape[1])
         return self._weights[i, :, :][upper]
 
@@ -115,6 +144,25 @@ def quadratic(x):
 
 
 class ForestDiscreteEnv(gym.Env):
+    """The Rigetti Forest environment.
+
+    This implements a Gym environment for gate-based quantum computing with
+    problem-specific rewards on the Rigetti hardware.
+
+    Attributes:
+        observation: A np.array, formed by concatenating observed bitstring values
+                     with a vector containing the problem weights.
+        observation_space: The (continuous) set of possible observations.
+        action space: The space of discrete actions.
+        instrs: A table mapping action IDs to PyQuil gates.
+
+    Args:
+        data: A path to a numpy dataset.
+        label: Either a path to a dataset of labels, or a single label value.
+        shuffle: A flag indicating whether the data should be randomly shuffled.
+        qpu: A flag indicating whether to run on the qpu given by QPU_NAME.
+
+    """
     def __init__(self, data, label, shuffle=False, qpu=False):
         weights = np.load(data)
         n_graphs = len(weights)
@@ -135,9 +183,9 @@ def __init__(self, data, label, shuffle=False, qpu=False):
 
         qubits = list(range(self.num_qubits))
         angles = np.linspace(0, 2 * np.pi, NUM_ANGLES, endpoint=False)
-        self._instrs = [CNOT(q0, q1) for q0, q1 in product(qubits, qubits) if q0 != q1]
-        self._instrs += [op(theta, q) for q, op, theta in product(qubits, [RX, RY, RZ], angles)]
-        self.action_space = gym.spaces.Discrete(len(self._instrs))
+        self.instrs = [CNOT(q0, q1) for q0, q1 in product(qubits, qubits) if q0 != q1]
+        self.instrs += [op(theta, q) for q, op, theta in product(qubits, [RX, RY, RZ], angles)]
+        self.action_space = gym.spaces.Discrete(len(self.instrs))
 
         obs_len = NUM_SHOTS * self.num_qubits + len(self.pset.problem(0))
         self.observation_space = gym.spaces.Box(np.full(obs_len, -1.0), np.full(obs_len, 1.0), dtype=np.float32)
@@ -146,7 +194,7 @@ def __init__(self, data, label, shuffle=False, qpu=False):
 
         self.qpu = qpu
         if qpu:
-            self._qc = get_qc('Aspen-4-16Q-A')
+            self._qc = get_qc(QPU_NAME)
         else:
             self._qc = QuantumComputer(name='qvm',
                                        qam=PyQVM(n_qubits=self.num_qubits),
@@ -156,13 +204,25 @@ def __init__(self, data, label, shuffle=False, qpu=False):
         self.reset()
 
     def reset(self, problem_id=None):
+        """Reset the state of the environment.
+
+        This clears out whatever program you may have assembled so far, and
+        updates the active problem.
+
+        Args:
+            problem_id: The numeric index of the problem (relative to the problem set).
+                        If None, a random problem will be chosen.
+        """
         if problem_id is None:
             problem_id = np.random.randint(0, self.pset.num_problems())
 
         self.problem_id = problem_id
         self._prob_vec = self.pset.problem(self.problem_id)
+        # the scoring function (for reward computation)
         self._prob_score = self.pset.bitstrings_score(self.problem_id)
 
+        # we put some trivial gates on each relevant qubit, so that we can
+        # always recover the problem variables from the program itself
         self.program = Program([I(q) for q in range(self.num_qubits)])
         self.current_step = 0
         self.running_episode_reward = 0
@@ -172,12 +232,18 @@ def reset(self, problem_id=None):
 
     @property
     def observation(self):
+        """Get the current observed quantum + problem state."""
         return np.concatenate([self.bitstrings.flatten(), self._prob_vec])
 
     def step(self, action):
-        instr = self._instrs[action]
+        """Advance the environment by performing the specified action."""
+        # get the instruction indicated by the action
+        instr = self.instrs[action]
+        # extend the program
         self.program.inst(instr)
+        # run and get some measured bitstrings
         self.bitstrings, info = self._run_program(self.program)
+        # compute the avg score of the bitstrings
         reward = self._prob_score(self.bitstrings)
         self.running_episode_reward += reward
 
@@ -185,6 +251,7 @@ def step(self, action):
         info['reward-nb'] = reward
         self.current_step += 1
 
+        # are we done yet?
         done = False
         if self.current_step >= MAX_PROGRAM_LENGTH:
             done = True
@@ -195,6 +262,9 @@ def step(self, action):
         return self.observation, reward, done, info
 
     def _wrap_program(self, program):
+        # the actions select gates. but a pyquil program needs a bit more
+        # namely, declaration of classical memory for readout, and suitable
+        # measurement instructions
         ro = program.declare('ro', 'BIT', self.num_qubits)
         for q in range(self.num_qubits):
             program.inst(MEASURE(q, ro[q]))
@@ -205,6 +275,7 @@ def _run_program(self, program):
         program = program.copy()
 
         if self.qpu:
+            # time to go through the compiler. whee!
             pragma = Program([Pragma('INITIAL_REWIRING', ['"PARTIAL"']), RESET()])
             program = pragma + program
             program = self._wrap_program(program)
@@ -218,7 +289,7 @@ def _run_program(self, program):
             results = self._qc.run(program)
 
         info = {
-            'gate_count': gate_count
+            'gate_count': gate_count # compiled length for qpu, uncompiled for qvm
         }
         return results, info
 
diff --git a/setup.py b/setup.py
index 8e0a6d0..8f6b078 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,21 @@
 from setuptools import setup
+try: # for pip >= 10
+    from pip._internal.req import parse_requirements
+except ImportError: # for pip <= 9.0.3
+    from pip.req import parse_requirements
 
-setup(name='gym_forest', version='0.0.1')
+with open("VERSION.txt", "r") as f:
+    VERSION = f.read().strip().strip('"')
+
+setup(
+    name="gym-forest",
+    version=VERSION,
+    author="Rigetti Computing",
+    author_email="info@rigetti.com",
+    description="Gym environments for quantum program synthesis",
+    packages=["gym_forest",
+              "gym_forest.envs"],
+    include_package_data=True,
+    # Dependent packages (distributions)
+    install_requires=[str(req.req) for req in parse_requirements('requirements.txt', session='hack')],
+)