Skip to content

[#3] feature들의 name 및 크기 정보를 반환하는 함수 구현 #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 18, 2024
54 changes: 52 additions & 2 deletions CATS/inputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import namedtuple
from typing import Literal
from collections import OrderedDict, namedtuple
from typing import List, Literal, Union

DEFAULT_GROUP_NAME = "default_group"

Expand Down Expand Up @@ -138,3 +138,53 @@ def __hash__(self):
:return: self.name's hash
"""
return self.name.__hash__()


def get_feature_names(feature_columns: List[Union[SparseFeat, DenseFeat, VarLenSparseFeat]]) -> list:
"""
Get list of feature names
:param feature_columns: list about feature instances (SparseFeat, DenseFeat, VarLenSparseFeat)
:return: list about features dictionary's keys
"""
if feature_columns is None:
raise ValueError("feature_columns is None. feature_columns must be list")
if not isinstance(feature_columns, list):
raise ValueError(f"feature_columns is {type(feature_columns)}, feature_columns must be list.")
if not all(isinstance(feature, (SparseFeat, DenseFeat, VarLenSparseFeat)) for feature in feature_columns):
raise TypeError(
"All elements in feature_columns must be instances of SparseFeat, DenseFeat or VarLenSparseFeat.")
features = build_input_features(feature_columns)
return list(features.keys())


def build_input_features(feature_columns: List[Union[SparseFeat, DenseFeat, VarLenSparseFeat]]) -> dict:
"""
Return an input feature dictionary based on various types of features (SparseFeat, DenseFeat, VarLenSparseFeat).
input feature dictionary stores the start and end inices of each feature, helping the model identify the location of
each feature in the input data.
:param feature_columns: list about feature instances (SparseFeat, DenseFeat, VarLenSparseFeat)
:return: dictionary about features
"""
features = OrderedDict()

curr_features_idx = 0
for feat in feature_columns:
feat_name = feat.name
if feat_name in features:
continue
if isinstance(feat, SparseFeat):
features[feat_name] = (curr_features_idx, curr_features_idx + 1)
curr_features_idx += 1
elif isinstance(feat, DenseFeat):
features[feat_name] = (curr_features_idx, curr_features_idx + feat.dimension)
curr_features_idx += feat.dimension
elif isinstance(feat, VarLenSparseFeat):
features[feat_name] = (curr_features_idx, curr_features_idx + feat.maxlen)
curr_features_idx += feat.maxlen
if feat.length_name is not None and feat.length_name not in features:
features[feat.length_name] = (curr_features_idx, curr_features_idx+1)
curr_features_idx += 1
else:
raise TypeError(f"Invalid feature column type, got {type(feat)}")
return features

Loading