NREL · rHorsey · Sep 11, 2024 · Jan 22, 2024 · Jan 23, 2024 · Jan 23, 2024
diff --git a/national/housing_characteristics/options_lookup.tsv b/national/housing_characteristics/options_lookup.tsv
diff --git a/postprocessing/compare_comstock_to_cbecs.py.template b/postprocessing/compare_comstock_to_cbecs.py.template
@@ -12,12 +12,12 @@ def main():
     # ComStock run
     comstock = cspp.ComStock(
         s3_base_dir='eulp/euss_com',  # If run not on S3, download results_up**.parquet manually
-        comstock_run_name='baseline_vav_mdp_adjust2',  # Name of the run on S3
-        comstock_run_version='baseline_vav_mdp_adjust2',  # Use whatever you want to see in plot and folder names
+        comstock_run_name='cycle_4_sampling_test_rand_985932_20240321',  # Name of the run on S3
+        comstock_run_version='new_sampling_test',  # Use whatever you want to see in plot and folder names
         comstock_year=2018,  # Typically don't change this
-        athena_table_name='baseline_vav_mdp_adjust2',  # Typically same as comstock_run_name or None
+        athena_table_name='rand_985932_20240321',  # Typically same as comstock_run_name or None
         truth_data_version='v01',  # Typically don't change this
-        buildstock_csv_name='buildstock.csv', # Download buildstock.csv manually
+        buildstock_csv_name='rand_985932_sampling_buildstock.csv', # Download buildstock.csv manually
         acceptable_failure_percentage=0.9,  # Can increase this when testing and high failure are OK
         drop_failed_runs=True,  # False if you want to evaluate which runs failed in raw output data
         color_hex='#0072B2',  # Color used to represent this run in plots
@@ -26,17 +26,32 @@ def main():
         include_upgrades=False,  # False if not looking at upgrades
         upgrade_ids_to_skip=[]  # Use [1, 3] etc. to exclude certain upgrades
     )
+
+    # Stock Estimation for Apportionment:
+    stock_estimate = cspp.Apportion(
+        stock_estimation_version='2024R2',  # Only updated when a new stock estimate is published
+        truth_data_version='v01'  # Typically don't change this
+    )
 
+    # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
+    comstock.add_weights_aportioned_by_stock_estimate(apportionment=stock_estimate)
+    comstock.create_national_aggregation()
+    comstock.create_geospatially_resolved_aggregations(comstock.STATE_ID, pretty_geo_col_name='state_id')
+    comstock.create_geospatially_resolved_aggregations(comstock.COUNTY_ID, pretty_geo_col_name='county_id')
+
     # CBECS
     cbecs = cspp.CBECS(
         cbecs_year=2018,  # 2012 and 2018 currently available
         truth_data_version='v01',  # Typically don't change this
         color_hex='#009E73',  # Color used to represent CBECS in plots
         reload_from_csv=False  # True if CSV already made and want faster reload times
         )
+
+    # TODO Update past here including ensuring we can still apply CBECS weights on top of previous weights.
 
     # Scale ComStock run to CBECS 2018 AND remove non-ComStock buildings from CBECS
     comstock.add_national_scaling_weights(cbecs, remove_non_comstock_bldg_types_from_cbecs=True)
+    comstock.calculate_weighted_columnal_values()
     comstock.export_to_csv_wide()
 
     # Make a comparison by passing in a list of CBECs and ComStock runs to compare

diff --git a/postprocessing/comstockpostproc/__init__.py b/postprocessing/comstockpostproc/__init__.py
@@ -4,6 +4,7 @@
 from .cbecs import CBECS
 from .eia import EIA
 from .ami import AMI
+from .comstock_apportionment import Apportion
 from .comstock_to_cbecs_comparison import ComStockToCBECSComparison
 from .comstock_measure_comparison import ComStockMeasureComparison
 from .comstock_to_eia_comparison import ComStockToEIAComparison

diff --git a/postprocessing/comstockpostproc/ami.py b/postprocessing/comstockpostproc/ami.py
@@ -140,6 +140,8 @@ def __init__(self, truth_data_version, color_hex=NamingMixin.COLOR_AMI, reload_f
             self.ami_timeseries_data = pd.read_csv(file_path, low_memory=False, index_col='timestamp', parse_dates=True)
         else:
             self.calculate_ami_aggregates()
+
+        assert isinstance(self.ami_timeseries_data, pd.DataFrame)
 
     def download_truth_data(self):
         # AMI data

diff --git a/postprocessing/comstockpostproc/cbecs.py b/postprocessing/comstockpostproc/cbecs.py
@@ -7,6 +7,7 @@
 import logging
 import numpy as np
 import pandas as pd
+import polars as pl
 
 from comstockpostproc.naming_mixin import NamingMixin
 from comstockpostproc.units_mixin import UnitsMixin
@@ -76,6 +77,12 @@ def __init__(self, cbecs_year, truth_data_version, color_hex=NamingMixin.COLOR_C
         for c in self.data.columns:
             logger.debug(c)
 
+        assert isinstance(self.data, pd.DataFrame)
+        logging.info(f'Created {self.dataset_name} with {len(self.data)} rows')
+        self.data = self.data.astype(str)
+        self.data = pl.from_pandas(self.data).lazy()
+        assert isinstance(self.data, pl.LazyFrame)
+
     def download_data(self):
         # CBECS microdata
         file_name = f'CBECS_{self.year}_microdata.csv'