Skip to content

Commit

Permalink
Merge pull request #269 from microsoft/encode-nominals
Browse files Browse the repository at this point in the history
Encoding categorical nominal to integers on discover DECI/Notears/PC backend
  • Loading branch information
rracanicci authored Nov 1, 2022
2 parents da0379b + 714cd71 commit f3ec46b
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 0 deletions.
10 changes: 10 additions & 0 deletions python/backend/backend/discover/algorithms/commons/base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ def _remove_rows_with_missing_values(self):
self._number_of_rows - self._prepared_data.shape[0]
)

def _transform_categorical_nominal_to_continuous(self):
# TODO: remove this once categorical values are properly handled by each algorithm
for name in self._prepared_data.columns:
if (
self._nature_by_variable[name]
== CausalVariableNature.CategoricalNominal
):
logging.info(f"encoding categorical nominal column {name} to integers")
self._prepared_data[name] = pd.factorize(self._prepared_data[name])[0]

def _prepare_data(self):
self._prepared_data = pd.DataFrame.from_dict(self._dataset_data)
self._remove_rows_with_missing_values()
Expand Down
1 change: 1 addition & 0 deletions python/backend/backend/discover/algorithms/deci.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(self, p: DeciPayload, progress_callback: ProgressCallback = None):
self._is_dag = None

def _build_causica_dataset(self) -> Dataset:
self._transform_categorical_nominal_to_continuous()
numpy_data = self._prepared_data.to_numpy()
data_mask = np.ones(numpy_data.shape)

Expand Down
2 changes: 2 additions & 0 deletions python/backend/backend/discover/algorithms/notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def __init__(self, p: NotearsPayload, progress_callback: ProgressCallback = None
super().__init__(p, progress_callback)

def do_causal_discovery(self) -> CausalGraph:
self._transform_categorical_nominal_to_continuous()

notears_graph = from_pandas(
self._prepared_data,
tabu_child_nodes=self._constraints.causes,
Expand Down
2 changes: 2 additions & 0 deletions python/backend/backend/discover/algorithms/pc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(self, p: PCPayload, progress_callback: ProgressCallback = None):
super().__init__(p, progress_callback)

def do_causal_discovery(self) -> CausalGraph:
self._transform_categorical_nominal_to_continuous()

n = PC(alpha=0.2)
n.learn(self._prepared_data.to_numpy())
graph_gc = networkx.DiGraph(n.causal_matrix)
Expand Down

0 comments on commit f3ec46b

Please sign in to comment.