Skip to content

temporary testing #190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions statistical_methods_library/imputation/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def impute(
# --- Validate params ---
if not isinstance(input_df, DataFrame):
raise TypeError("Input is not a DataFrame")

input_df.show(100)
back_data_df.show(100)
link_cols = [forward_link_col, backward_link_col]
if any(link_cols) and not all(link_cols):
raise TypeError("Either all or no link columns must be specified")
Expand Down Expand Up @@ -375,6 +376,9 @@ def calculate_ratios():
# Since we're going to join on to the main df filtering here
# won't cause us to lose grouping as they'll just be filled with
# default ratios.
print("Inside engine::calculate_ratios : filtered_refs :: ")
filtered_refs.show(500)

if link_filter:
ratio_filter_df = prepared_df.join(
filtered_refs, ["ref", "period", "grouping"]
Expand All @@ -392,6 +396,8 @@ def calculate_ratios():
"next_period",
"match",
)
print("Inside engine::calculate_ratios : ratio_filter_df :: just before the actual ratio call::")
ratio_filter_df.show(500)

# Put the values from the current and previous periods for a
# contributor on the same row.
Expand Down Expand Up @@ -431,7 +437,8 @@ def calculate_ratios():
"link_inclusion_previous",
)
)

print("Inside engine::calculate_ratios : ratio_calculation_df :: just before the actual ratio call::")
ratio_calculation_df.show(500)
# Join the grouping ratios onto the input such that each contributor has
# a set of ratios.
fill_values = {}
Expand All @@ -447,7 +454,8 @@ def calculate_ratios():
output_col_mapping.update(result.additional_outputs)

prepared_df = prepared_df.fillna(fill_values)

print("Inside engine::calculate_ratios : prepared_df :: after the actual ratio call::")
prepared_df.show(500)
if link_filter:
prepared_df = prepared_df.join(
ratio_calculation_df.select(
Expand Down Expand Up @@ -804,9 +812,12 @@ def forward_impute_from_construction(df: DataFrame) -> DataFrame:
manual_construction_col and stage == construct_values
):
break
return df.join(prior_period_df, [col("prior_period") < col("period")]).select(
df.sort("ref","period").show(500)

df = df.join(prior_period_df, [col("prior_period") < col("period")]).select(
[
col(k).alias(output_col_mapping[k])
for k in sorted(output_col_mapping.keys() & set(df.columns))
]
)
return df
9 changes: 7 additions & 2 deletions statistical_methods_library/imputation/ratio_calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def mean_of_ratios(
& (lit(include_zeros) | (col("next_output") != lit(0))),
col("next_output"),
).alias("next_output"),
).selectExpr(
)
print("Inside ration_calculator::mean_of_ratios ::df :: before growth caluclation")
df.show(500)
df =df.selectExpr(
"period",
"grouping",
"ref",
Expand Down Expand Up @@ -176,7 +179,9 @@ def mean_of_ratios(
END
END AS growth_backward""",
)

print("Inside ration_calculator::mean_of_ratios ::df :: after growth caluclation")
df.show(500)

if lower_trim is not None:

def lower_bound(c):
Expand Down
157 changes: 157 additions & 0 deletions test_data_for_period0_backdata.txt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
identifier,date,group,output,other,marker,encoded_status,link_exclusion
1234,"202104","900",10,2,MC,201,N
1235,"202104","100",20,2,FIMC,200,N
1220,"202104","100",20,2,FIMC,200,N
1221,"202104","100",1800,2,R,210,N
1222,"202104","100",250,2,R,211,N
1223,"202104","100",200,2,C,200,N
1224,"202104","100",75,2,FIR,200,N
1225,"202104","100",25,2,FIC,200,N
1226,"202104","100",150,2,R,210,N
1227,"202104",100,210,81,R,210,N
1236,"202104",100,2100,81,R,210,N
1238,"202104",200,30,81,R,210,N
1239,"202104",200,20,81,FIR,200,N
1240,"202104",200,45,91,C,201,N
1241,"202104",200,500,81,FIC,201,N
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
identifier,date,group,output,other,marker
1234,"202104","900",10,2,MC
1235,"202104","100",20,2,FIMC
1220,"202104","100",20,2,FIMC
1221,"202104","100",1800,2,R
1222,"202104","100",250,2,R
1223,"202104","100",200,2,C,
1224,"202104","100",75,2,FIR
1225,"202104","100",25,2,FIC
1226,"202104","100",150,2,R
1227,"202104",100,210,81,R
1236,"202104",100,2100,81,R
1238,"202104",200,30,81,R
1239,"202104",200,20,81,FIR
1240,"202104",200,45,91,C
1241,"202104",200,500,81,FIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
identifier,date,group,output,other,marker,link_inclusion_current,link_inclusion_previous,link_inclusion_next
1234,"202104","900",10,2,MC,,,,
1235,"202104","100",20,2,FIMC,,,,
1220,"202104","100",20,2,FIMC,,,,
1221,"202104","100",1800,2,R,true,,,
1222,"202104","100",250,2,R,true,,,
1223,"202104","100",200,2,C,,,,
1224,"202104","100",75,2,FIR,,,,
1225,"202104","100",25,2,FIC,,,,
1226,"202104","100",150,2,R,true,,,
1227,"202104",100,210,81,R,true,,,
1236,"202104",100,2100,81,R,true,,,
1238,"202104",200,30,81,R,true,,,
1239,"202104",200,20,81,FIR,,,,
1240,"202104",200,45,91,C,,,,
1241,"202104",200,500,81,FIC,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
identifier,date,group,question,other,manual_construction,encoded_status,link_exclusion
1234,"202105",900,,78,,201,N,
1235,"202105",100,,81,,201,N,
1236,"202105",100,2113,81,,210,N,
1220,"202105",100,20,22,,200,N,
1221,"202105",100,,2,,200,N,
1222,"202105",100,250,22,R,211,N,
1223,"202105",100,,2,250,200,N,
1224,"202105",100,,22,,200,N,
1225,"202105",100,R,2,,200,N,
1226,"202105",100,,22,,210,N,
1227,"202105",100,210,81,,210,N,
1228,"202105",100,,81,,210,N,
1237,"202105",200,,81,3189,200,N,
1238,"202105",200,,81,,200,N,
1239,"202105",200,,81,,200,N,
1240,"202105",200,,81,,200,N,
1241,"202105",200,,81,,200,N,
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
identifier,date,group,output,marker,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
1234,202105,900,10,FIMC,1,1,1,0,0,0,true,true,true
1235,202105,100,20,FIMC,1,1,26.08641975,0,0,1,true,true,false
1236,202105,100,2113,R,1,1,26.08641975,0,0,1,true,true,false
1237,202105,200,3189,MC,1,1,1,0,0,0,true,true,true
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
identifier,date,group,question,other,manual_construction
identifier,date,group,question,other,manual_construction,
1234,"202105",900,,78,
1235,"202105",100,,81,
1236,"202105",100,2113,81,
Expand Down
Loading
Loading