Skip to content

Commit

Permalink
stabilize normalization results by modifying sampling process
Browse files Browse the repository at this point in the history
  • Loading branch information
leahincom committed Aug 23, 2021
1 parent 8ac6444 commit 1a69150
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions featuretools/mkfeat/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@
import autonormalize as an


def normalize(df: DataFrame, key_colname):
def sampling(df: DataFrame):
if len(df) > 1000:
df = df.sample(n=1000)
ret = df
repeat = len(df) // 1000 if len(df) // 1000 > 5 else 5
for _ in range(repeat):
sample = df.sample(n=1000)
ret = ret & sample
return ret
else:
return df


def normalize(df: DataFrame, key_colname):
df = sampling(df)
es = an.auto_entityset(df, index=key_colname, accuracy=0.98)

norminfos = []
Expand Down

0 comments on commit 1a69150

Please sign in to comment.