diff --git a/regexmodel/regexclass.py b/regexmodel/regexclass.py index e326dc9..62134f0 100644 --- a/regexmodel/regexclass.py +++ b/regexmodel/regexclass.py @@ -260,6 +260,7 @@ def get_candidates(cls, series: pl.Series, else: score_sub = 0 regex = cls() + next_series_sub = next_series_full if score_full >= score_sub: if score_full > 0: @@ -324,7 +325,7 @@ def score(series: pl.Series, regex: BaseRegex, count_thres: int, fraction_match = next_not_null/cur_not_null fraction_cover = n_unique/regex.n_possible - expected_finish = fraction_match**avg_len_next*next_not_null + expected_finish = fraction_match**avg_len_next*next_not_null # type: ignore expected_finish = max(1e-12, expected_finish) split_penalty = 1/(1 + count_thres/expected_finish) cur_score = regex.subrange_penalty*split_penalty*fraction_cover*fraction_match