diff --git a/dowhy/causal_estimators/regression_estimator.py b/dowhy/causal_estimators/regression_estimator.py
index d0394c86c6..f7c86fb357 100644
--- a/dowhy/causal_estimators/regression_estimator.py
+++ b/dowhy/causal_estimators/regression_estimator.py
@@ -5,6 +5,7 @@
 import statsmodels.api as sm
 
 from dowhy.causal_estimator import CausalEstimate, CausalEstimator, IdentifiedEstimand
+from dowhy.utils.encoding import one_hot_encode
 
 
 class RegressionEstimator(CausalEstimator):
@@ -70,6 +71,53 @@ def __init__(
 
         self.model = None
 
+        # Data encoders
+        # encoder_drop_first will not encode the first category value with a bit in 1-hot encoding.
+        # It will be implicit instead, by the absence of any bit representing this value in the relevant columns.
+        # Set to False to include a bit for each value of every categorical variable.
+        self.encoder_drop_first = True
+        self.reset_encoders()
+
+    def reset_encoders(self):
+        """
+        Removes any reference to data encoders, causing them to be re-created on next `fit()`.
+
+        It's important that data is consistently encoded otherwise models will produce inconsistent output.
+        In particular, categorical variables are one-hot encoded; the mapping of original data values
+        must be identical between model training/fitting and inference time.
+
+        Encoders are reset when `fit()` is called again, as the data is assumed to have changed.
+
+        A separate encoder is used for each subset of variables (treatment, common causes and effect modifiers).
+        """
+        self._encoders = {
+            "treatment": None,
+            "observed_common_causes": None,
+            "effect_modifiers": None,
+        }
+
+    def _encode(self, data: pd.DataFrame, encoder_name: str):
+        """
+        Encodes categorical columns in the given data, returning a new dataframe containing
+        all original data and the encoded columns. Numerical data is unchanged, categorical
+        types are one-hot encoded. `encoder_name` identifies a specific encoder to be used
+        if available, or created if not. The encoder can be reused in subsequent calls.
+
+        :param data: Data to encode.
+        :param encoder_name: The name for the encoder to be used.
+        :returns: The encoded data.
+        """
+        existing_encoder = self._encoders.get(encoder_name)
+        encoded_variables, encoder = one_hot_encode(
+            data,
+            drop_first=self.encoder_drop_first,
+            encoder=existing_encoder,
+        )
+
+        # Remember encoder
+        self._encoders[encoder_name] = encoder
+        return encoded_variables
+
     def fit(
         self,
         data: pd.DataFrame,
@@ -84,13 +132,14 @@ def fit(
                     effects, or return a heterogeneous effect function. Not all
                     methods support this currently.
         """
+        self.reset_encoders()  # Forget any existing encoders
         self._set_effect_modifiers(data, effect_modifier_names)
 
         self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
         self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
         if len(self._observed_common_causes_names) > 0:
             self._observed_common_causes = data[self._observed_common_causes_names]
-            self._observed_common_causes = pd.get_dummies(self._observed_common_causes, drop_first=True)
+            self._observed_common_causes = self._encode(self._observed_common_causes, "observed_common_causes")
         else:
             self._observed_common_causes = None
 
@@ -148,14 +197,42 @@ def _estimate_effect_fn(self, data_df):
         est = self.estimate_effect(data=data_df, need_conditional_estimates=False)
         return est.value
 
+    def _set_effect_modifiers(self, data: pd.DataFrame, effect_modifier_names: Optional[List[str]] = None):
+        """Sets the effect modifiers for the estimator
+        Modifies need_conditional_estimates accordingly to effect modifiers value
+        :param effect_modifiers: Variables on which to compute separate
+            effects, or return a heterogeneous effect function. Not all
+            methods support this currently.
+        """
+        self._effect_modifiers = effect_modifier_names
+        if effect_modifier_names is not None:
+            self._effect_modifier_names = [cname for cname in effect_modifier_names if cname in data.columns]
+            if len(self._effect_modifier_names) > 0:
+                self._effect_modifiers = data[self._effect_modifier_names]
+                self._effect_modifiers = self._encode(self._effect_modifiers, "effect_modifiers")
+                self.logger.debug("Effect modifiers: " + ",".join(self._effect_modifier_names))
+            else:
+                self._effect_modifier_names = []
+        else:
+            self._effect_modifier_names = []
+
+        self.need_conditional_estimates = (
+            self.need_conditional_estimates
+            if self.need_conditional_estimates != "auto"
+            else (self._effect_modifier_names and len(self._effect_modifier_names) > 0)
+        )
+
     def _build_features(self, data_df: pd.DataFrame, treatment_values=None):
-        treatment_vals = pd.get_dummies(data_df[self._target_estimand.treatment_variable], drop_first=True)
+        treatment_vals = self._encode(data_df[self._target_estimand.treatment_variable], "treatment")
+
         if len(self._observed_common_causes_names) > 0:
             observed_common_causes_vals = data_df[self._observed_common_causes_names]
-            observed_common_causes_vals = pd.get_dummies(observed_common_causes_vals, drop_first=True)
+            observed_common_causes_vals = self._encode(observed_common_causes_vals, "observed_common_causes")
+
         if self._effect_modifier_names:
             effect_modifiers_vals = data_df[self._effect_modifier_names]
-            effect_modifiers_vals = pd.get_dummies(effect_modifiers_vals, drop_first=True)
+            effect_modifiers_vals = self._encode(effect_modifiers_vals, "effect_modifiers")
+
         # Fixing treatment value to the specified value, if provided
         if treatment_values is not None:
             treatment_vals = treatment_values
@@ -164,6 +241,7 @@ def _build_features(self, data_df: pd.DataFrame, treatment_values=None):
         # treatment_vals and data_df should have same number of rows
         if treatment_vals.shape[0] != data_df.shape[0]:
             raise ValueError("Provided treatment values and dataframe should have the same length.")
+
         # Bulding the feature matrix
         n_treatment_cols = 1 if len(treatment_vals.shape) == 1 else treatment_vals.shape[1]
         n_samples = treatment_vals.shape[0]
@@ -195,32 +273,25 @@ def interventional_outcomes(self, data_df: pd.DataFrame, treatment_val):
         """
 
         if data_df is None:
-            data_df = self._data
+            data_df = self._data.copy()
+        else:
+            data_df = data_df.copy()  # don't modify arg
+
+        # Replace treatment values with value supplied; note: Don't change column datatype!
+        original_type = data_df[self._target_estimand.treatment_variable].dtypes
+        data_df[self._target_estimand.treatment_variable] = treatment_val
+        data_df[self._target_estimand.treatment_variable] = data_df[self._target_estimand.treatment_variable].astype(
+            original_type, copy=False
+        )
+
+        return self.predict(data_df)
+
+    def predict(self, data_df):
         if not self.model:
             # The model is always built on the entire data
-            _, self.model = self._build_model(data_df)
-        # Replacing treatment values by given x
-        # First, create interventional tensor in original space
-        interventional_treatment_values = np.full(
-            (data_df.shape[0], len(self._target_estimand.treatment_variable)), treatment_val
-        )
-        # Then, use pandas to ensure that the dummies are assigned correctly for a categorical treatment
-        interventional_treatment_2d = pd.concat(
-            [
-                data_df[self._target_estimand.treatment_variable].copy(),
-                pd.DataFrame(
-                    data=interventional_treatment_values,
-                    columns=data_df[self._target_estimand.treatment_variable].columns,
-                ),
-            ],
-            axis=0,
-        ).astype(data_df[self._target_estimand.treatment_variable].dtypes, copy=False)
-        interventional_treatment_2d = pd.get_dummies(interventional_treatment_2d, drop_first=True)
-        interventional_treatment_2d = interventional_treatment_2d[
-            data_df[self._target_estimand.treatment_variable].shape[0] :
-        ]
-
-        new_features = self._build_features(data_df, treatment_values=interventional_treatment_2d)
+            _, self.model = self._build_model()
+
+        new_features = self._build_features(data_df=data_df)
         interventional_outcomes = self.predict_fn(data_df, self.model, new_features)
         return interventional_outcomes
 
diff --git a/dowhy/utils/encoding.py b/dowhy/utils/encoding.py
new file mode 100644
index 0000000000..722e1473ae
--- /dev/null
+++ b/dowhy/utils/encoding.py
@@ -0,0 +1,62 @@
+import pandas as pd
+from pandas.core.dtypes.common import is_list_like
+from sklearn.preprocessing import OneHotEncoder
+
+
+def one_hot_encode(data: pd.DataFrame, columns=None, drop_first: bool = False, encoder: OneHotEncoder = None):
+    """
+    Replaces pandas' get_dummies with an implementation of sklearn.preprocessing.OneHotEncoder.
+
+    The purpose of replacement is to allow encoding of new data using the same encoder, which ensures that the resulting encodings are consistent.
+
+    If encoder is None, a new instance of sklearn.preprocessing.OneHotEncoder will be created using `fit_transform()`. Otherwise, the existing encoder is used with `fit()`.
+
+    For compatibility with get_dummies, the encoded data will be transformed into a DataFrame.
+
+    In all cases, the return value will be the encoded data and the encoder object (even if passed in). If `data` contains other columns than the
+    dummy-coded one(s), these will be prepended, unaltered, to the result.
+
+    :param data: Data of which to get dummy indicators.
+    :param columns: List-like structure containing specific columns to encode.
+    :param drop_first: Whether to get k-1 dummies out of k categorical levels by removing the first level.
+    :return: DataFrame, OneHotEncoder
+    """
+
+    # Determine columns being encoded
+    if columns is None:
+        dtypes_to_encode = ["object", "string", "category"]
+        data_to_encode = data.select_dtypes(include=dtypes_to_encode)
+    elif not is_list_like(columns):
+        raise TypeError("Input must be a list-like for parameter `columns`")
+    else:
+        data_to_encode = data[columns]
+
+    # If all columns are already numerical, there may be nothing to encode.
+    # In this case, return original data.
+    if len(data_to_encode.columns) == 0:
+        return data, encoder  # Encoder may be None
+
+    # Columns to keep in the result - not encoded.
+    columns_to_keep = data.columns.difference(data_to_encode.columns)
+    df_columns_to_keep = data[columns_to_keep].reset_index(drop=True)
+
+    if encoder is None:  # Create new encoder
+        drop = None
+        if drop_first:
+            drop = "first"
+        encoder = OneHotEncoder(drop=drop, sparse=False)  # NB sparse renamed to sparse_output in sklearn 1.2+
+
+        encoded_data = encoder.fit_transform(data_to_encode)
+
+    else:  # Use existing encoder
+        encoded_data = encoder.transform(data_to_encode)
+
+    # Convert the encoded data to a DataFrame
+    columns_encoded = encoder.get_feature_names_out(data_to_encode.columns)
+
+    df_encoded = pd.DataFrame(encoded_data, columns=columns_encoded).reset_index(drop=True)  # drop index from original
+
+    # Concatenate the encoded DataFrame with the original non-categorical columns
+    df_result = pd.concat([df_columns_to_keep, df_encoded], axis=1)
+
+    return df_result, encoder
diff --git a/tests/utils/test_encoding.py b/tests/utils/test_encoding.py
new file mode 100644
index 0000000000..3fb69062f7
--- /dev/null
+++ b/tests/utils/test_encoding.py
@@ -0,0 +1,87 @@
+import networkx as nx
+import numpy as np
+import pandas as pd
+from _pytest.python_api import approx
+
+from dowhy.utils.encoding import one_hot_encode
+
+
+def test_one_hot_encode_equivalent_to_get_dummies():
+
+    # Use a mix of already-numeric and requires encoding cols:
+    data = {
+        "C": ["X", "Y", "Z", "X", "Y", "Z"],
+        "N": [1, 2, 3, 4, 5, 6],
+    }
+    df = pd.DataFrame(data)
+
+    # NB There may be small differences in type but since all values will be used in models as float,
+    # comparison is done as this type.
+    df_dummies = pd.get_dummies(df, drop_first=True)
+    df_dummies = df_dummies.astype(float)
+
+    df_sklearn, _ = one_hot_encode(df, drop_first=True)
+    df_sklearn = df_sklearn.astype(float)
+
+    # Check same rows
+    len1 = len(df_dummies)
+    len2 = len(df_sklearn)
+    assert len1 == len2
+
+    # Check same number of cols
+    len1 = len(df_dummies.columns)
+    len2 = len(df_sklearn.columns)
+    assert len1 == len2
+
+    # Check values
+    # Calculate the sum of absolute differences between the two DataFrames
+    # - should be zero (excl. floating point error)
+    sum_abs_diff = (df_dummies - df_sklearn).abs().sum().sum()
+    assert sum_abs_diff == approx(0.0)
+
+
+def test_one_hot_encode_consistent_with_new_data():
+
+    # Use a mix of already-numeric and requires encoding cols:
+    data1 = {
+        "C": ["X", "Y", "Z", "X", "Y", "Z"],
+        "N": [1, 2, 3, 4, 5, 6],
+    }
+    df1 = pd.DataFrame(data1)
+
+    # Initial encode
+    df_encoded1, encoder = one_hot_encode(df1, drop_first=True)
+    df_encoded1 = df_encoded1.astype(float)
+
+    # Create new data with permuted rows.
+    # Output shape should be unchanged.
+    data2 = {
+        "C": ["Y", "Z", "X", "X", "Y", "Z"],
+        "N": [1, 2, 3, 4, 5, 6],
+    }
+    df2 = pd.DataFrame(data2)
+
+    # Encode this new data.
+    df_encoded2, _ = one_hot_encode(df2, encoder=encoder, drop_first=True)
+    df_encoded2 = df_encoded2.astype(float)
+
+    # Check same rows
+    len1 = len(df_encoded1)
+    len2 = len(df_encoded2)
+    assert len1 == len2
+
+    # Check same number of cols
+    len1 = len(df_encoded1.columns)
+    len2 = len(df_encoded2.columns)
+    assert len1 == len2
+
+    # Check permuted values are consistent
+    c_y1 = df_encoded1["C_Y"]
+    c_y2 = df_encoded2["C_Y"]
+    assert c_y1[1] == c_y2[0]
+    assert c_y1[4] == c_y2[4]
+
+    c_z1 = df_encoded1["C_Z"]
+    c_z2 = df_encoded2["C_Z"]
+    assert c_z1[2] == c_z2[1]
+    assert c_z1[5] == c_z2[5]