diff --git a/analyses/new-feature-template.qmd b/analyses/new-feature-template.qmd index 98e24d6d3..d675dd753 100644 --- a/analyses/new-feature-template.qmd +++ b/analyses/new-feature-template.qmd @@ -15,8 +15,8 @@ format: fig-align: center fontsize: 12pt params: - run_id: "2025-02-11-charming-eric" - run_id_year: "2025" + run_id: "2026-02-03-compassionate-bowen" + run_id_year: "2026" comparison_run_id: "2025-01-10-serene-boni" comparison_run_id_year: "2025" added_feature: "time_sale_roll_mean_nbhd_t0_w3" diff --git a/dvc.lock b/dvc.lock index 52faab73d..246c92a51 100755 --- a/dvc.lock +++ b/dvc.lock @@ -5,25 +5,24 @@ stages: deps: - path: pipeline/00-ingest.R hash: md5 - md5: c4ceb95f9f3886c99bb7517d65208957 - size: 22881 + md5: ccf3c4f2bf27ebfc99bcc0c67cfed6e2 + size: 23623 params: params.yaml: assessment: - year: '2025' - date: '2025-01-01' - triad: north + year: '2026' + date: '2026-01-01' + triad: south group: residential - data_year: '2024' - working_year: '2025' + data_year: '2025' + working_year: '2026' input: - min_sale_year: '2016' - max_sale_year: '2024' + min_sale_year: '2017' + max_sale_year: '2025' n_years_prior: 4 complex: match_exact: - meta_township_code - - meta_class - char_bsmt - char_gar1_size - char_attic_fnsh @@ -38,35 +37,35 @@ stages: outs: - path: input/assessment_data.parquet hash: md5 - md5: 5bddc7898ab32b5d082fd7acc9cb39c3 - size: 420008988 + md5: 815bca22e0844c82e910d166015e5452 + size: 418572081 - path: input/char_data.parquet hash: md5 - md5: c82b16c142a14cd3b3e1f9c57012db83 - size: 842953864 + md5: 8fb6173490b9e969ef9c0f2aa9a48313 + size: 843081253 - path: input/complex_id_data.parquet hash: md5 - md5: 1a6b5a11c589b520343c7badb111644b - size: 709841 + md5: 425076a3e7179c93baa5f511ef71bf4b + size: 706109 - path: input/hie_data.parquet hash: md5 - md5: 33b8b10dd555d63498010dc05ca3f00e - size: 1924357 + md5: 79d62ef4d1a9b5d945a5aafaed54e263 + size: 1922138 - path: input/land_nbhd_rate_data.parquet hash: md5 - md5: 5fe80edfabdfac91efe888a25ee4051c - size: 6019 + md5: b4cfaf3d4a35c250990752024a88f3bb + size: 5709 - path: input/training_data.parquet hash: md5 - md5: 4146491d9160905569e67ade97610c69 - size: 205524817 + md5: 9f59471e50de4f1716274f600b566db9 + size: 197716407 train: cmd: Rscript pipeline/01-train.R deps: - path: input/training_data.parquet hash: md5 - md5: 3156fd30394ae3fb9eda7e0d0176ab2f - size: 208501951 + md5: 9f59471e50de4f1716274f600b566db9 + size: 197716407 - path: pipeline/01-train.R hash: md5 md5: 46115d48cf066d35b0db14dc13a8d9b3 @@ -211,7 +210,6 @@ stages: - prox_nearest_road_highway_dist_ft - prox_nearest_road_arterial_dist_ft - prox_nearest_road_collector_dist_ft - - prox_nearest_road_highway_daily_traffic - prox_nearest_road_arterial_daily_traffic - prox_nearest_road_collector_daily_traffic - prox_nearest_new_construction_dist_ft @@ -233,8 +231,6 @@ stages: - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - other_tax_bill_rate - - other_school_district_elementary_avg_rating - - other_school_district_secondary_avg_rating - time_sale_year - time_sale_day - time_sale_quarter_of_year @@ -279,13 +275,13 @@ stages: - meta_class - meta_card_num - meta_sale_document_num - model.seed: 2024 + model.seed: 2026 model.verbose: -1 ratio_study: - far_year: '2021' + far_year: '2023' far_stage: board far_column: meta_2yr_pri_board_tot - near_year: '2023' + near_year: '2025' near_stage: certified near_column: meta_certified_tot min_n_sales: 30 @@ -307,72 +303,72 @@ stages: outs: - path: output/intermediate/timing/model_timing_train.parquet hash: md5 - md5: 718355a0a97646ada3839b423fa6f505 + md5: f3b98cc316e8de429f36b1e2dac2e358 size: 2494 - path: output/parameter_final/model_parameter_final.parquet hash: md5 - md5: 42d5e8030cef68b122116608d206756a + md5: fb992eb468be163cefb9b7e81fc2daa3 size: 6403 - path: output/parameter_range/model_parameter_range.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/parameter_search/model_parameter_search.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/test_card/model_test_card.parquet hash: md5 - md5: 2c849fe0de354d762fb6bd7cc9527b3c - size: 2252194 + md5: 3dcfbe00fdf1b9b43a8efd61eba9cd38 + size: 2051281 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 4f474f2031e275bc6af6fed5ac84cf11 - size: 11819090 + md5: b69d2dcdefe58704b06b84907b32f130 + size: 11729461 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: 94a53f083b777c19b0259fe20b02dc47 - size: 1981143 + md5: 2942309984679ea67a89a3d127fe78cd + size: 43615 assess: cmd: Rscript pipeline/02-assess.R deps: - path: input/assessment_data.parquet hash: md5 - md5: cecaf4aee89d2269bd059f536e611101 - size: 425453415 + md5: 815bca22e0844c82e910d166015e5452 + size: 418572081 - path: input/complex_id_data.parquet hash: md5 - md5: 835be789fc9ef09f3bfa1d5c8465f6e6 - size: 704175 + md5: 425076a3e7179c93baa5f511ef71bf4b + size: 706109 - path: input/land_nbhd_rate_data.parquet hash: md5 - md5: f3ec9627322bd271bf2957b7388aaa34 - size: 3873 + md5: b4cfaf3d4a35c250990752024a88f3bb + size: 5709 - path: input/training_data.parquet hash: md5 - md5: 3156fd30394ae3fb9eda7e0d0176ab2f - size: 208501951 + md5: 9f59471e50de4f1716274f600b566db9 + size: 197716407 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 4f474f2031e275bc6af6fed5ac84cf11 - size: 11819090 + md5: b69d2dcdefe58704b06b84907b32f130 + size: 11729461 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: 94a53f083b777c19b0259fe20b02dc47 - size: 1981143 + md5: 2942309984679ea67a89a3d127fe78cd + size: 43615 - path: pipeline/02-assess.R hash: md5 - md5: 8ad109ba91182b164db83c67a7f097e9 - size: 22671 + md5: dfd02c27b923634010a7723ba679efb3 + size: 23253 params: params.yaml: assessment: - year: '2024' - date: '2024-01-01' - triad: north + year: '2026' + date: '2026-01-01' + triad: south group: residential - data_year: '2023' - working_year: '2025' + data_year: '2025' + working_year: '2026' model.predictor.all: - meta_township_code - meta_nbhd_code @@ -434,7 +430,6 @@ stages: - prox_nearest_road_highway_dist_ft - prox_nearest_road_arterial_dist_ft - prox_nearest_road_collector_dist_ft - - prox_nearest_road_highway_daily_traffic - prox_nearest_road_arterial_daily_traffic - prox_nearest_road_collector_daily_traffic - prox_nearest_new_construction_dist_ft @@ -456,8 +451,6 @@ stages: - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - other_tax_bill_rate - - other_school_district_elementary_avg_rating - - other_school_district_secondary_avg_rating - time_sale_year - time_sale_day - time_sale_quarter_of_year @@ -473,7 +466,6 @@ stages: - shp_parcel_mrr_side_ratio - shp_parcel_num_vertices pv: - multicard_yoy_cap: 2.2 land_pct_of_total_cap: 0.5 round_break: - 1000 @@ -486,10 +478,10 @@ stages: - 10000 round_type: floor ratio_study: - far_year: '2021' + far_year: '2023' far_stage: board far_column: meta_2yr_pri_board_tot - near_year: '2023' + near_year: '2025' near_stage: certified near_column: meta_certified_tot min_n_sales: 30 @@ -510,27 +502,27 @@ stages: outs: - path: output/assessment_card/model_assessment_card.parquet hash: md5 - md5: 92b7958bedd436e99cc3c891ad08acc1 - size: 279676461 + md5: a2e78733c6bf3745408cbfdddaf79de3 + size: 279244518 - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: 45652e2a5f56ba388adee4fbbc0ed097 - size: 113678976 + md5: c6231bdeb6ecd182cd83244589a5b6f6 + size: 111516900 - path: output/intermediate/timing/model_timing_assess.parquet hash: md5 - md5: c24ab596c68f5d85bc9840693913592b + md5: a196a4a4e56f22519ec3550a242f0e26 size: 2494 evaluate: cmd: Rscript pipeline/03-evaluate.R deps: - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: 45652e2a5f56ba388adee4fbbc0ed097 - size: 113678976 + md5: c6231bdeb6ecd182cd83244589a5b6f6 + size: 111516900 - path: output/test_card/model_test_card.parquet hash: md5 - md5: 2c849fe0de354d762fb6bd7cc9527b3c - size: 2252194 + md5: 3dcfbe00fdf1b9b43a8efd61eba9cd38 + size: 2051281 - path: pipeline/03-evaluate.R hash: md5 md5: b68f8032a61613e5b2d25829f955f056 @@ -538,17 +530,17 @@ stages: params: params.yaml: assessment: - year: '2024' - date: '2024-01-01' - triad: north + year: '2026' + date: '2026-01-01' + triad: south group: residential - data_year: '2023' - working_year: '2025' + data_year: '2025' + working_year: '2026' ratio_study: - far_year: '2021' + far_year: '2023' far_stage: board far_column: meta_2yr_pri_board_tot - near_year: '2023' + near_year: '2025' near_stage: certified near_column: meta_certified_tot min_n_sales: 30 @@ -569,51 +561,51 @@ stages: outs: - path: output/intermediate/timing/model_timing_evaluate.parquet hash: md5 - md5: 9b6970f2d37eb392f0749aca46e3cdab + md5: f0f3238577d9bdf835f210871ceffc2f size: 2514 - path: output/performance/model_performance_assessment.parquet hash: md5 - md5: b97ebb3ee8b84c449808eaa0461ee328 - size: 2757682 + md5: bfb62b767573d17e288b7cc38cd68132 + size: 3224032 - path: output/performance/model_performance_test.parquet hash: md5 - md5: 74e2066197cb5b081a0ce1824053d6ec - size: 8435404 + md5: 6fd4726e9b4d5686aeb5aaa559650788 + size: 8240751 - path: output/performance_quantile/model_performance_quantile_assessment.parquet hash: md5 - md5: 1b1b40251eaaff638da9a92c9a6d524c - size: 989964 + md5: 252248672a31b8a6b3225d85aa9084e4 + size: 996286 - path: output/performance_quantile/model_performance_quantile_test.parquet hash: md5 - md5: 700d46e105a18f8f50c82fcc9835289a - size: 4886365 + md5: 2d9b186aaa4acb6015cd1f3c5577e4c2 + size: 4867672 interpret: cmd: Rscript pipeline/04-interpret.R deps: - path: input/assessment_data.parquet hash: md5 - md5: cecaf4aee89d2269bd059f536e611101 - size: 425453415 + md5: 815bca22e0844c82e910d166015e5452 + size: 418572081 - path: input/training_data.parquet hash: md5 - md5: 3156fd30394ae3fb9eda7e0d0176ab2f - size: 208501951 + md5: 9f59471e50de4f1716274f600b566db9 + size: 197716407 - path: output/assessment_card/model_assessment_card.parquet hash: md5 - md5: 92b7958bedd436e99cc3c891ad08acc1 - size: 279676461 + md5: a2e78733c6bf3745408cbfdddaf79de3 + size: 279244518 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 4f474f2031e275bc6af6fed5ac84cf11 - size: 11819090 + md5: b69d2dcdefe58704b06b84907b32f130 + size: 11729461 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: 94a53f083b777c19b0259fe20b02dc47 - size: 1981143 + md5: 2942309984679ea67a89a3d127fe78cd + size: 43615 - path: pipeline/04-interpret.R hash: md5 - md5: b56862f667b87c72f851ecc836ea4ea5 - size: 9726 + md5: c18082f2493a485d5ccc73bdb90ccbc0 + size: 13562 params: params.yaml: model.predictor.all: @@ -677,7 +669,6 @@ stages: - prox_nearest_road_highway_dist_ft - prox_nearest_road_arterial_dist_ft - prox_nearest_road_collector_dist_ft - - prox_nearest_road_highway_daily_traffic - prox_nearest_road_arterial_daily_traffic - prox_nearest_road_collector_daily_traffic - prox_nearest_new_construction_dist_ft @@ -699,8 +690,6 @@ stages: - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - other_tax_bill_rate - - other_school_district_elementary_avg_rating - - other_school_district_secondary_avg_rating - time_sale_year - time_sale_day - time_sale_quarter_of_year @@ -716,47 +705,47 @@ stages: - shp_parcel_mrr_side_ratio - shp_parcel_num_vertices toggle.comp_enable: false - toggle.shap_enable: false + toggle.shap_enable: true outs: - path: output/comp/model_comp.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/feature_importance/model_feature_importance.parquet hash: md5 - md5: 0188f3467e29797ae416937625e38d98 - size: 8962 + md5: ee1d28a09869e0bcdece028f1d6bfded + size: 8629 - path: output/intermediate/timing/model_timing_interpret.parquet hash: md5 - md5: 609a31970e44ac5a41eca85abef26f44 - size: 2519 + md5: 9e20fc369078d7c83be55eb482618299 + size: 2534 - path: output/shap/model_shap.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 - size: 501 + md5: af45fd61c45df4873123ce9abe158503 + size: 879608090 finalize: cmd: Rscript pipeline/05-finalize.R deps: - path: output/intermediate/timing/model_timing_assess.parquet hash: md5 - md5: c24ab596c68f5d85bc9840693913592b + md5: a196a4a4e56f22519ec3550a242f0e26 size: 2494 - path: output/intermediate/timing/model_timing_evaluate.parquet hash: md5 - md5: 9b6970f2d37eb392f0749aca46e3cdab + md5: f0f3238577d9bdf835f210871ceffc2f size: 2514 - path: output/intermediate/timing/model_timing_interpret.parquet hash: md5 - md5: 609a31970e44ac5a41eca85abef26f44 - size: 2519 + md5: 9e20fc369078d7c83be55eb482618299 + size: 2534 - path: output/intermediate/timing/model_timing_train.parquet hash: md5 - md5: 718355a0a97646ada3839b423fa6f505 + md5: f3b98cc316e8de429f36b1e2dac2e358 size: 2494 - path: pipeline/05-finalize.R hash: md5 - md5: 69a7cd711d917d38aee5f87a14e29e33 - size: 7867 + md5: 8f007737c0127329ad42afad687a1b80 + size: 7813 params: params.yaml: cv: @@ -769,13 +758,12 @@ stages: uncertain: 8 best_metric: rmse input: - min_sale_year: '2015' - max_sale_year: '2023' + min_sale_year: '2017' + max_sale_year: '2025' n_years_prior: 4 complex: match_exact: - meta_township_code - - meta_class - char_bsmt - char_gar1_size - char_attic_fnsh @@ -790,7 +778,7 @@ stages: model: engine: lightgbm objective: rmse - seed: 2024 + seed: 2026 deterministic: true force_row_wise: true verbose: -1 @@ -856,7 +844,6 @@ stages: - prox_nearest_road_highway_dist_ft - prox_nearest_road_arterial_dist_ft - prox_nearest_road_collector_dist_ft - - prox_nearest_road_highway_daily_traffic - prox_nearest_road_arterial_daily_traffic - prox_nearest_road_collector_daily_traffic - prox_nearest_new_construction_dist_ft @@ -878,8 +865,6 @@ stages: - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - other_tax_bill_rate - - other_school_district_elementary_avg_rating - - other_school_district_secondary_avg_rating - time_sale_year - time_sale_day - time_sale_quarter_of_year @@ -990,7 +975,6 @@ stages: - -3 - 2 pv: - multicard_yoy_cap: 2.2 land_pct_of_total_cap: 0.5 round_break: - 1000 @@ -1003,10 +987,10 @@ stages: - 10000 round_type: floor ratio_study: - far_year: '2021' + far_year: '2023' far_stage: board far_column: meta_2yr_pri_board_tot - near_year: '2023' + near_year: '2025' near_stage: certified near_column: meta_certified_tot min_n_sales: 30 @@ -1024,49 +1008,49 @@ stages: - loc_school_elementary_district_geoid - loc_school_secondary_district_geoid - loc_school_unified_district_geoid - run_note: Preparing for 2025 model with 2024 data + run_note: Potential residential baseline with SHAP values, revert DVC path toggle: cv_enable: false - shap_enable: false + shap_enable: true comp_enable: false - upload_enable: false + upload_enable: true outs: - path: output/intermediate/timing/model_timing_finalize.parquet hash: md5 - md5: 1a0ebc384517468a55d9f698e8ce9fe5 + md5: 22027ae58fdc1986c41b9f0a1273d246 size: 2519 - path: output/metadata/model_metadata.parquet hash: md5 - md5: 0f80826aee046eec76ddb1b03733d00b - size: 21462 + md5: 0104542652e89027447fefdbe1939a7f + size: 21011 - path: output/timing/model_timing.parquet hash: md5 - md5: b0b625cc0a3a0d6039a97490974f0bd2 - size: 5123 + md5: 55177cc343b5e0caeadb994b5249381b + size: 5163 - path: reports/performance/performance.html hash: md5 - md5: ffdeaee2060b19a86f67b47fd6801950 - size: 28900191 + md5: bef5a4e88aed64110be341fb9edeaaa3 + size: 43361129 export: cmd: Rscript pipeline/07-export.R deps: - path: pipeline/07-export.R hash: md5 - md5: b4615315b52165eed4a030c94def015b - size: 33718 + md5: bd72f9ee143a584446c2c3d3bc28d35b + size: 35506 params: params.yaml: - assessment.year: '2023' + assessment.year: '2026' export: triad_code: '3' - run_id: 2023-03-14-clever-damani - input.max_sale_year: '2022' - input.min_sale_year: '2014' + run_id: 2026-02-03-compassionate-bowen + input.max_sale_year: '2025' + input.min_sale_year: '2017' ratio_study: - far_year: '2020' + far_year: '2023' far_stage: board far_column: meta_2yr_pri_board_tot - near_year: '2022' + near_year: '2025' near_stage: certified near_column: meta_certified_tot min_n_sales: 30 @@ -1089,77 +1073,77 @@ stages: deps: - path: output/assessment_card/model_assessment_card.parquet hash: md5 - md5: 92b7958bedd436e99cc3c891ad08acc1 - size: 279676461 + md5: a2e78733c6bf3745408cbfdddaf79de3 + size: 279244518 - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: 45652e2a5f56ba388adee4fbbc0ed097 - size: 113678976 + md5: c6231bdeb6ecd182cd83244589a5b6f6 + size: 111516900 - path: output/comp/model_comp.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/feature_importance/model_feature_importance.parquet hash: md5 - md5: 0188f3467e29797ae416937625e38d98 - size: 8962 + md5: ee1d28a09869e0bcdece028f1d6bfded + size: 8629 - path: output/metadata/model_metadata.parquet hash: md5 - md5: 0f80826aee046eec76ddb1b03733d00b - size: 21462 + md5: 0104542652e89027447fefdbe1939a7f + size: 21011 - path: output/parameter_final/model_parameter_final.parquet hash: md5 - md5: 42d5e8030cef68b122116608d206756a + md5: fb992eb468be163cefb9b7e81fc2daa3 size: 6403 - path: output/parameter_range/model_parameter_range.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/parameter_search/model_parameter_search.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 + md5: b378f8ae73167478032391c6cdd3bbad size: 501 - path: output/performance/model_performance_assessment.parquet hash: md5 - md5: b97ebb3ee8b84c449808eaa0461ee328 - size: 2757682 + md5: bfb62b767573d17e288b7cc38cd68132 + size: 3224032 - path: output/performance/model_performance_test.parquet hash: md5 - md5: 74e2066197cb5b081a0ce1824053d6ec - size: 8435404 + md5: 6fd4726e9b4d5686aeb5aaa559650788 + size: 8240751 - path: output/performance_quantile/model_performance_quantile_assessment.parquet hash: md5 - md5: 1b1b40251eaaff638da9a92c9a6d524c - size: 989964 + md5: 252248672a31b8a6b3225d85aa9084e4 + size: 996286 - path: output/performance_quantile/model_performance_quantile_test.parquet hash: md5 - md5: 700d46e105a18f8f50c82fcc9835289a - size: 4886365 + md5: 2d9b186aaa4acb6015cd1f3c5577e4c2 + size: 4867672 - path: output/shap/model_shap.parquet hash: md5 - md5: a47965c8cbafb84368f2a21a047bc7f2 - size: 501 + md5: af45fd61c45df4873123ce9abe158503 + size: 879608090 - path: output/test_card/model_test_card.parquet hash: md5 - md5: 2c849fe0de354d762fb6bd7cc9527b3c - size: 2252194 + md5: 3dcfbe00fdf1b9b43a8efd61eba9cd38 + size: 2051281 - path: output/timing/model_timing.parquet hash: md5 - md5: b0b625cc0a3a0d6039a97490974f0bd2 - size: 5123 + md5: 55177cc343b5e0caeadb994b5249381b + size: 5163 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 4f474f2031e275bc6af6fed5ac84cf11 - size: 11819090 + md5: b69d2dcdefe58704b06b84907b32f130 + size: 11729461 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: 94a53f083b777c19b0259fe20b02dc47 - size: 1981143 + md5: 2942309984679ea67a89a3d127fe78cd + size: 43615 - path: pipeline/06-upload.R hash: md5 md5: 0fe374cec43dca64602659213fce24b6 size: 11297 - path: reports/performance/performance.html hash: md5 - md5: ffdeaee2060b19a86f67b47fd6801950 - size: 28900191 + md5: bef5a4e88aed64110be341fb9edeaaa3 + size: 43361129 diff --git a/params.yaml b/params.yaml index 9ca885a81..e5c7dc37c 100644 --- a/params.yaml +++ b/params.yaml @@ -11,11 +11,11 @@ # Model tag used to identify the purpose of the run. Must be one of: # "junk", "rejected", "test", "baseline", "candidate", "final", or "comps" -run_type: "test" +run_type: "baseline" # Note included with each run. Use this to summarize what changed about the run # or add context -run_note: Testing 2026 baseline model with updated parameters +run_note: Potential residential baseline with SHAP values, revert DVC path toggle: # Should the train stage run full cross-validation? Otherwise, the model @@ -27,7 +27,7 @@ toggle: shap_enable: true # Should comps be calculated for this run in the interpret stage? - comp_enable: true + comp_enable: false # Upload all modeling artifacts and results to S3 in the upload stage. Set # to false if you are not a CCAO employee @@ -419,4 +419,4 @@ comp: # upload export: triad_code: "3" - run_id: "2025-02-11-charming-eric" + run_id: "2026-02-03-compassionate-bowen" diff --git a/pipeline/00-ingest.R b/pipeline/00-ingest.R index 13031fd96..008e736ab 100644 --- a/pipeline/00-ingest.R +++ b/pipeline/00-ingest.R @@ -148,16 +148,17 @@ assessment_data <- dbGetQuery( ) tictoc::toc() -# The neighborhood code for pin 30172130150000 is incorrect before 2026. +# The neighborhood codes for these pins are incorrect before 2026. # Unfortunately this leads to a random neighborhood from the North Tri getting # included in reporting and exports for a South Tri modeling year. Because this # is an iasWorld data issue and can't be corrected for 2025 since it's locked -# for editing, we've decided to hard-code this correction into the pipeline for -# now. This code should be removed for assessment year 2027 modeling. +# for editing, we've decided to hard-code these corrections into the pipeline +# for now. This code should be removed for assessment year 2027 modeling. assessment_data <- assessment_data %>% mutate( meta_nbhd_code = case_when( meta_pin == "30172130150000" ~ "37061", + meta_pin == "15024050110000" ~ "33020", TRUE ~ meta_nbhd_code ) ) diff --git a/reports/challenge_groups/challenge_groups.qmd b/reports/challenge_groups/challenge_groups.qmd index b31256fb5..21e61172b 100644 --- a/reports/challenge_groups/challenge_groups.qmd +++ b/reports/challenge_groups/challenge_groups.qmd @@ -20,8 +20,8 @@ knitr: out.width: "100%" editor: source params: - run_id: "2025-02-11-charming-eric" - year: "2025" + run_id: "2026-02-03-compassionate-bowen" + year: "2026" --- ```{r _setup_script} diff --git a/reports/model_features/model_features.qmd b/reports/model_features/model_features.qmd index 18010483f..0b6fff8da 100644 --- a/reports/model_features/model_features.qmd +++ b/reports/model_features/model_features.qmd @@ -11,9 +11,9 @@ format: embed-resources: true params: # The run ID for last year's model (usually the final res model) - comp_run_id: "2024-03-17-stupefied-maya" + comp_run_id: "2025-02-11-charming-eric" # The run ID for the new model run that we want to compare to last year's model - baseline_run_id: "2025-02-11-charming-eric" + baseline_run_id: "2026-02-03-compassionate-bowen" --- {{< include data_changes.qmd >}} diff --git a/reports/performance/performance.qmd b/reports/performance/performance.qmd index 6d80cdfab..9dbd8dfc8 100644 --- a/reports/performance/performance.qmd +++ b/reports/performance/performance.qmd @@ -20,8 +20,8 @@ knitr: out.width: "100%" editor: source params: - run_id: "2025-02-11-charming-eric" - year: "2025" + run_id: "2026-02-03-compassionate-bowen" + year: "2026" --- ```{r _setup_script}