Skip to content

Commit

Permalink
fix cleaning of missing columns
Browse files Browse the repository at this point in the history
  • Loading branch information
patrick-troy committed Sep 20, 2024
1 parent d507672 commit dbaf473
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 9 deletions.
2 changes: 1 addition & 1 deletion liiatools/annex_a_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,4 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str):
for report in ["PAN"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "annex_a", "xlsx")
report_data.export(report_folder, "annex_a", "xlsx")
2 changes: 1 addition & 1 deletion liiatools/cin_census_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str):
for report in ["PAN"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "cin_census_", "csv")
report_data.export(report_folder, "cin_census_", "csv")

# Run report analysis
# analysis_data = report_data.data["CIN"]
Expand Down
4 changes: 2 additions & 2 deletions liiatools/common/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def degrade_data(

def prepare_export(
data: DataContainer, config: PipelineConfig, profile: str = None
) -> ProcessResult:
) -> DataContainer:
"""
Prepare data for export by removing columns that are not required for the given profile
or for all configured tables if no profile is given.
Expand Down Expand Up @@ -145,4 +145,4 @@ def prepare_export(
# Return the subset
data_container[table_name] = table[table_columns].copy()

return ProcessResult(data=data_container, errors=None)
return data_container
4 changes: 2 additions & 2 deletions liiatools/csww_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,9 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str, public_fs: FileL
for report in ["PAN"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "csww_", "csv")
report_data.export(report_folder, "csww_", "csv")

# Run MET analysis
met_data = met_analysis(report_data.data["Worker"], public_fs)
met_data = met_analysis(report_data["Worker"], public_fs)
met_folder = export_folder.makedirs("MET", recreate=True)
met_data.export(met_folder, "csww_", "csv")
2 changes: 1 addition & 1 deletion liiatools/s251_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,4 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str):
for report in ["PAN"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "s251_", "csv")
report_data.export(report_folder, "s251_", "csv")
6 changes: 5 additions & 1 deletion liiatools/school_census_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def process_file(
)
return ProcessResult(data=None, errors=errors)

cleanfile_result.data = prepare_export(
cleanfile_result.data, pipeline_config, profile="PAN"
)

# Export the cleaned data to the session 'cleaned' folder
cleanfile_result.data.export(
session_folder, f"{SessionNames.CLEANED_FOLDER}/{uuid}_", "parquet"
Expand Down Expand Up @@ -146,4 +150,4 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str, filename: str):
for report in ["PAN"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "school_census_", "csv")
report_data.export(report_folder, "school_census_", "csv")
2 changes: 1 addition & 1 deletion liiatools/ssda903_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,4 @@ def process_session(source_fs: FS, output_fs: FS, la_code: str):
for report in ["PAN", "SUFFICIENCY"]:
report_data = prepare_export(current_data, pipeline_config, profile=report)
report_folder = export_folder.makedirs(report, recreate=True)
report_data.data.export(report_folder, "ssda903_", "csv")
report_data.export(report_folder, "ssda903_", "csv")

0 comments on commit dbaf473

Please sign in to comment.