diff --git a/featuretools/mkfeat/normalize.py b/featuretools/mkfeat/normalize.py index 3facde89be..54d578bd0a 100644 --- a/featuretools/mkfeat/normalize.py +++ b/featuretools/mkfeat/normalize.py @@ -2,9 +2,20 @@ import autonormalize as an -def normalize(df: DataFrame, key_colname): +def sampling(df: DataFrame): if len(df) > 1000: - df = df.sample(n=1000) + ret = df + repeat = len(df) // 1000 if len(df) // 1000 > 5 else 5 + for _ in range(repeat): + sample = df.sample(n=1000) + ret = ret & sample + return ret + else: + return df + + +def normalize(df: DataFrame, key_colname): + df = sampling(df) es = an.auto_entityset(df, index=key_colname, accuracy=0.98) norminfos = []