diff --git a/PopSynthesis/DataProcessor/DataProcessor.py b/PopSynthesis/DataProcessor/DataProcessor.py
index 277c66d..7e64c34 100644
--- a/PopSynthesis/DataProcessor/DataProcessor.py
+++ b/PopSynthesis/DataProcessor/DataProcessor.py
@@ -4,25 +4,35 @@
 from pathlib import Path
 from os import PathLike
 from typing import Union
-from PopSynthesis.DataProcessor.utils.const_files import hh_seed_file, pp_seed_file
+from PopSynthesis.DataProcessor.utils.const_files import (
+    hh_seed_file,
+    pp_seed_file,
+    raw_data_dir,
+    processed_data_dir,
+    output_dir,
+)
 from PopSynthesis.DataProcessor.utils.general_utils import find_file
 
+
 class DataProcessorGeneric:
-    def __init__(self, raw_data_src:PathLike[Union[Path, str]], mid_processed_src: PathLike[Union[Path, str]], output_data_src: PathLike[Union[Path, str]]) -> None:
+    def __init__(
+        self,
+        raw_data_src: PathLike[Union[Path, str]],
+        mid_processed_src: PathLike[Union[Path, str]],
+        output_data_src: PathLike[Union[Path, str]],
+    ) -> None:
         self.raw_data_path = Path(raw_data_src)
         self.mid_process_path = Path(mid_processed_src)
         self.output_data_path = Path(output_data_src)
 
     def process_all_seed(self):
         NotImplemented
-        
-    
+
     def process_households_seed(self):
         # Import the hh seed data
         hh_file = find_file(base_path=self.raw_data_path, filename=hh_seed_file)
         print(hh_file)
 
-
     def process_persons_seed(self):
         NotImplemented
 
@@ -34,4 +44,7 @@ def process_households_census(self):
 
     def process_persons_census(self):
         NotImplemented
-        
\ No newline at end of file
+
+
+if __name__ == "__main__":
+    a = DataProcessorGeneric()
diff --git a/PopSynthesis/DataProcessor/utils/const_files.py b/PopSynthesis/DataProcessor/utils/const_files.py
index 0e8e2ce..89287ce 100644
--- a/PopSynthesis/DataProcessor/utils/const_files.py
+++ b/PopSynthesis/DataProcessor/utils/const_files.py
@@ -1,4 +1,12 @@
 hh_seed_file = "H_VISTA_1220_SA1.csv"
 pp_seed_file = "P_VISTA_1220_SA1.csv"
 seed_loc = "VISA"
-census_loc = "CENSUSS"
\ No newline at end of file
+census_loc = "CENSUSS"
+
+raw_data_dir = (
+    r"C:\Users\dlaa0001\Documents\PhD\PopSyn_Monash\PopSynthesis\DataProcessor\data"
+)
+processed_data_dir = r"C:\Users\dlaa0001\Documents\PhD\PopSyn_Monash\PopSynthesis\DataProcessor\processed_data"
+output_dir = (
+    r"C:\Users\dlaa0001\Documents\PhD\PopSyn_Monash\PopSynthesis\DataProcessor\output"
+)
diff --git a/PopSynthesis/DataProcessor/utils/const_process.py b/PopSynthesis/DataProcessor/utils/const_process.py
index ecb1ebf..828f4fe 100644
--- a/PopSynthesis/DataProcessor/utils/const_process.py
+++ b/PopSynthesis/DataProcessor/utils/const_process.py
@@ -8,14 +8,14 @@
 ct = str(ct).replace(".", "-").replace(":", "-").replace(" ", "-")
 
 # create logger
-logging.basicConfig(format='%(asctime)s - %(levelname)s: %(message)s')
-logger = logging.getLogger('process inputs data')
+logging.basicConfig(format="%(asctime)s - %(levelname)s: %(message)s")
+logger = logging.getLogger("process inputs data")
 logger.setLevel(logging.DEBUG)
 # create file handler which logs even debug messages
-fh = logging.FileHandler(os.path.join(log_dir, f'process_data_{ct}.log'))
+fh = logging.FileHandler(os.path.join(log_dir, f"process_data_{ct}.log"))
 fh.setLevel(logging.DEBUG)
 # create formatter and add it to the handlers
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 fh.setFormatter(formatter)
 # add the handlers to the logger
 logger.addHandler(fh)
@@ -43,14 +43,7 @@
     "$8,000 or more ($416,000 or more)",
 ]
 
-HH_ATTS = [
-    "hhid",
-    "dwelltype",
-    "owndwell",
-    "hhinc",
-    "totalvehs",
-    "hhsize"
-]
+HH_ATTS = ["hhid", "dwelltype", "owndwell", "hhinc", "totalvehs", "hhsize"]
 
 PP_ATTS = [
     "persid",
@@ -60,10 +53,15 @@
     "relationship",
     "persinc",
     "nolicence",
-    "anywork"
+    "anywork",
 ]
 
-LS_GR_RELA = ["Self", "Spouse", "Child", "Grandchild"] # For the rest we will make them Others
+LS_GR_RELA = [
+    "Self",
+    "Spouse",
+    "Child",
+    "Grandchild",
+]  # For the rest we will make them Others
 HANDLE_THE_REST_RELA = "Others"
 ALL_RELA = LS_GR_RELA + [HANDLE_THE_REST_RELA]
 NOT_INCLUDED_IN_BN_LEARN = ["hhid", "persid", "relationship"]
diff --git a/PopSynthesis/DataProcessor/utils/general_utils.py b/PopSynthesis/DataProcessor/utils/general_utils.py
index d40edb5..00a126d 100644
--- a/PopSynthesis/DataProcessor/utils/general_utils.py
+++ b/PopSynthesis/DataProcessor/utils/general_utils.py
@@ -6,4 +6,4 @@ def find_file(base_path, filename):
     for file in base_path.rglob(filename):
         if file.is_file():
             return file
-    return None
\ No newline at end of file
+    return None
diff --git a/PopSynthesis/DataProcessor/utils/seed/add_weghts.py b/PopSynthesis/DataProcessor/utils/seed/add_weghts.py
index 3d215cc..0b681b4 100644
--- a/PopSynthesis/DataProcessor/utils/seed/add_weghts.py
+++ b/PopSynthesis/DataProcessor/utils/seed/add_weghts.py
@@ -1,14 +1,20 @@
 import polars as pl
 
+
 def get_weights_dict(hh_df_w: pl.DataFrame, pp_df_w: pl.DataFrame):
     re_dict = {}
     # Process HH weights
-    hh_df_w["_weight"] = hh_df_w["wdhhwgt_sa3"].fillna(0) + hh_df_w["wehhwgt_sa3"].fillna(0)
-    pp_df_w["_weight"] = pp_df_w["wdperswgt_sa3"].fillna(0) + pp_df_w["weperswgt_sa3"].fillna(0)
+    hh_df_w["_weight"] = hh_df_w["wdhhwgt_sa3"].fillna(0) + hh_df_w[
+        "wehhwgt_sa3"
+    ].fillna(0)
+    pp_df_w["_weight"] = pp_df_w["wdperswgt_sa3"].fillna(0) + pp_df_w[
+        "weperswgt_sa3"
+    ].fillna(0)
     re_dict["hh"] = dict(zip(hh_df_w["hhid"], hh_df_w["_weight"]))
     re_dict["pp"] = dict(zip(pp_df_w["persid"], pp_df_w["_weight"]))
     return re_dict
 
+
 def add_weights_in_df(df, weights_dict, type="hh"):
     select_col = None
     dict_check = weights_dict[type]
@@ -17,8 +23,10 @@ def add_weights_in_df(df, weights_dict, type="hh"):
         if len(check_cols) == 0:
             raise ValueError("No HHID to match with the weights")
         else:
-            select_col = check_cols[0] # Don't know there will be mutiple but just incase, will select the first col
-        
+            select_col = check_cols[
+                0
+            ]  # Don't know there will be mutiple but just incase, will select the first col
+
     elif type == "pp":
         check_cols = [x for x in df.columns if "persid" in x]
         if len(check_cols) == 0:
@@ -26,11 +34,11 @@ def add_weights_in_df(df, weights_dict, type="hh"):
         elif len(check_cols) == 1:
             select_col = check_cols[0]
         else:
-            pref_val = "persid_main" # We will now use the weights of the main person
+            pref_val = "persid_main"  # We will now use the weights of the main person
             select_col = pref_val if pref_val in check_cols else check_cols[0]
     else:
         raise ValueError("You pick wrong type for dict check")
-    
+
     assert select_col is not None
     df["_weight"] = df.apply(lambda row: dict_check[row[select_col]], axis=1)
-    return df
\ No newline at end of file
+    return df
diff --git a/PopSynthesis/DataProcessor/utils/seed/hh/add_pp_rela_to_hh.py b/PopSynthesis/DataProcessor/utils/seed/hh/add_pp_rela_to_hh.py
index c28922a..eea85d6 100644
--- a/PopSynthesis/DataProcessor/utils/seed/hh/add_pp_rela_to_hh.py
+++ b/PopSynthesis/DataProcessor/utils/seed/hh/add_pp_rela_to_hh.py
@@ -1,4 +1,3 @@
-
 def adding_pp_related_atts(hh_df, pp_df):
     # This adding the persons-related atts to the hh df for later sampling
     # at the moment we will use to have the number of each relationship
@@ -8,15 +7,17 @@ def adding_pp_related_atts(hh_df, pp_df):
     dict_count_rela = {}
     for hhid, rela_gr in zip(gb_df_pp.index, gb_df_pp):
         check_dict = {x: 0 for x in ls_rela}
-        for i in rela_gr: check_dict[i] += 1
+        for i in rela_gr:
+            check_dict[i] += 1
         dict_count_rela[hhid] = check_dict
 
     for rela in ls_rela:
-        hh_df[rela] = hh_df.apply(lambda row: dict_count_rela[row["hhid"]][rela], axis=1)
+        hh_df[rela] = hh_df.apply(
+            lambda row: dict_count_rela[row["hhid"]][rela], axis=1
+        )
 
     # check Self again
     assert len(hh_df["Main"].unique()) == 1
     assert hh_df["Main"].unique()[0] == 1
 
     return hh_df.drop(columns=["Main"])
-
diff --git a/PopSynthesis/DataProcessor/utils/seed/hh/process_general_hh.py b/PopSynthesis/DataProcessor/utils/seed/hh/process_general_hh.py
index 7bf073a..32ce50f 100644
--- a/PopSynthesis/DataProcessor/utils/seed/hh/process_general_hh.py
+++ b/PopSynthesis/DataProcessor/utils/seed/hh/process_general_hh.py
@@ -4,6 +4,7 @@ def convert_veh(row):
             return str(row["totalvehs"])
         else:
             return f"{veh_limit}+"
+
     hh_df["totalvehs"] = hh_df.apply(convert_veh, axis=1)
     return hh_df
 
@@ -13,7 +14,7 @@ def con_inc(row):
         hh_inc = row["hhinc"]
         # Confime hhinc always exist, it's float
         if hh_inc < 0:
-            return "Negative income" #NOTE: None like this but exist in census, need to check whether this can be an issue
+            return "Negative income"  # NOTE: None like this but exist in census, need to check whether this can be an issue
         elif hh_inc > 0:
             for state in check_states:
                 bool_val = None
@@ -33,14 +34,23 @@ def con_inc(row):
                     return state
         else:
             return "Nil income"
+
     hh_df["hhinc"] = hh_df.apply(con_inc, axis=1)
     return hh_df
 
-def convert_hh_dwell(hh_df): # Removing the occupied rent free
-    hh_df["owndwell"] = hh_df.apply(lambda r: "Something Else" if r["owndwell"] == "Occupied Rent-Free" else r["owndwell"], axis=1)
+
+def convert_hh_dwell(hh_df):  # Removing the occupied rent free
+    hh_df["owndwell"] = hh_df.apply(
+        lambda r: "Something Else"
+        if r["owndwell"] == "Occupied Rent-Free"
+        else r["owndwell"],
+        axis=1,
+    )
     return hh_df
 
 
 def convert_hh_size(hh_df):
-    hh_df["hhsize"] = hh_df.apply(lambda r: "8+" if r["hhsize"] >= 8 else str(r["hhsize"]), axis=1)
-    return hh_df
\ No newline at end of file
+    hh_df["hhsize"] = hh_df.apply(
+        lambda r: "8+" if r["hhsize"] >= 8 else str(r["hhsize"]), axis=1
+    )
+    return hh_df
diff --git a/PopSynthesis/DataProcessor/utils/seed/hh/process_hh_main.py b/PopSynthesis/DataProcessor/utils/seed/hh/process_hh_main.py
index f1db6a9..c9596e5 100644
--- a/PopSynthesis/DataProcessor/utils/seed/hh/process_hh_main.py
+++ b/PopSynthesis/DataProcessor/utils/seed/hh/process_hh_main.py
@@ -1,4 +1,6 @@
-def process_hh_main_person(hh_df, main_pp_df, to_csv=False, name_file="connect_hh_main", include_weights=True):
+def process_hh_main_person(
+    hh_df, main_pp_df, to_csv=False, name_file="connect_hh_main", include_weights=True
+):
     # they need to perfect match
     assert len(hh_df) == len(main_pp_df)
     combine_df = hh_df.merge(main_pp_df, on="hhid", how="inner")
@@ -10,7 +12,7 @@ def process_hh_main_person(hh_df, main_pp_df, to_csv=False, name_file="connect_h
 
     if not include_weights:
         combine_df = combine_df.drop(columns="_weight")
-    
+
     if to_csv:
-        combine_df.to_csv(os.path.join(processed_data ,f"{name_file}.csv"), index=False)
+        combine_df.to_csv(os.path.join(processed_data, f"{name_file}.csv"), index=False)
     return combine_df
diff --git a/PopSynthesis/DataProcessor/utils/seed/pp/convert_age.py b/PopSynthesis/DataProcessor/utils/seed/pp/convert_age.py
index 8904f43..fb927e2 100644
--- a/PopSynthesis/DataProcessor/utils/seed/pp/convert_age.py
+++ b/PopSynthesis/DataProcessor/utils/seed/pp/convert_age.py
@@ -1,16 +1,15 @@
-
 def get_main_max_age(pp_df):
     # add the dummy inc to rank
     ls_hh_id = pp_df["hhid"].unique()
     for hh_id in ls_hh_id:
         print(hh_id)
-        sub_df = pp_df[pp_df["hhid"]==hh_id]
+        sub_df = pp_df[pp_df["hhid"] == hh_id]
         idx_max_age = sub_df["age"].idxmax()
         rela_max_age = sub_df.loc[idx_max_age]["relationship"]
         # CONFIRMED this will be Spouse or Others only
         pp_df.at[idx_max_age, "relationship"] = "Main"
         if rela_max_age != "Self":
-            sub_sub_df = sub_df[sub_df["relationship"]=="Self"]
+            sub_sub_df = sub_df[sub_df["relationship"] == "Self"]
             idx_self = sub_sub_df.index[0]
             pp_df.at[idx_self, "relationship"] = rela_max_age
     return pp_df
@@ -26,13 +25,12 @@ def convert_pp_age_gr(pp_df, range_age=10, age_limit=100):
             new_name = f"{hold_min}-{hold_min+range_age-1}"
         check_dict[i] = new_name
     check_dict["others"] = f"{age_limit}+"
-    
+
     def convert_age(row):
         if row["age"] in check_dict:
             return check_dict[row["age"]]
         else:
             return check_dict["others"]
-        
+
     pp_df["age"] = pp_df.apply(convert_age, axis=1)
     return pp_df
-
diff --git a/PopSynthesis/DataProcessor/utils/seed/pp/convert_inc.py b/PopSynthesis/DataProcessor/utils/seed/pp/convert_inc.py
index f32f989..1fddeab 100644
--- a/PopSynthesis/DataProcessor/utils/seed/pp/convert_inc.py
+++ b/PopSynthesis/DataProcessor/utils/seed/pp/convert_inc.py
@@ -1,4 +1,3 @@
-
 def add_converted_inc(pp_df):
     def process_inc(row):
         r_check = row["persinc"]
@@ -21,6 +20,6 @@ def process_inc(row):
         else:
             raise ValueError(f"Dunno I never seen this lol {r_check}")
         return val
-    
+
     pp_df["inc_dummy"] = pp_df.apply(process_inc, axis=1)
     return pp_df
diff --git a/PopSynthesis/DataProcessor/utils/seed/pp/process_main_others.py b/PopSynthesis/DataProcessor/utils/seed/pp/process_main_others.py
index 52ae16b..d18a851 100644
--- a/PopSynthesis/DataProcessor/utils/seed/pp/process_main_others.py
+++ b/PopSynthesis/DataProcessor/utils/seed/pp/process_main_others.py
@@ -1,10 +1,11 @@
-
 def process_main_other(main_pp_df, sub_df, rela, to_csv=True, include_weights=True):
-    assert len(main_pp_df["relationship"].unique()) == 1 # It is Main
-    assert len(sub_df["relationship"].unique()) == 1 # It is the relationship we checking
+    assert len(main_pp_df["relationship"].unique()) == 1  # It is Main
+    assert (
+        len(sub_df["relationship"].unique()) == 1
+    )  # It is the relationship we checking
     # Change the name to avoid confusion
-    main_pp_df = main_pp_df.add_suffix('_main', axis=1)
-    sub_df = sub_df.add_suffix(f'_{rela}', axis=1)
+    main_pp_df = main_pp_df.add_suffix("_main", axis=1)
+    sub_df = sub_df.add_suffix(f"_{rela}", axis=1)
     main_pp_df = main_pp_df.rename(columns={"hhid_main": "hhid"})
     sub_df = sub_df.rename(columns={f"hhid_{rela}": "hhid"})
 
@@ -17,9 +18,10 @@ def process_main_other(main_pp_df, sub_df, rela, to_csv=True, include_weights=Tr
 
     if not include_weights:
         combine_df = combine_df.drop(columns="_weight")
-    
+
     if to_csv:
-        combine_df.to_csv(os.path.join(processed_data, f"connect_main_{rela}.csv"), index=False)
-    
-    return combine_df
+        combine_df.to_csv(
+            os.path.join(processed_data, f"connect_main_{rela}.csv"), index=False
+        )
 
+    return combine_df
diff --git a/PopSynthesis/DataProcessor/utils/seed/pp/process_relationships.py b/PopSynthesis/DataProcessor/utils/seed/pp/process_relationships.py
index 25a755a..a305a98 100644
--- a/PopSynthesis/DataProcessor/utils/seed/pp/process_relationships.py
+++ b/PopSynthesis/DataProcessor/utils/seed/pp/process_relationships.py
@@ -1,10 +1,12 @@
 from collections import defaultdict
 from PopSynthesis.Methods.connect_HH_PP.scripts.const import *
 
+
 def check_rela_gb(gb_df):
     for hhid, rela_gr in zip(gb_df.index, gb_df):
         check_dict = defaultdict(lambda: 0)
-        for i in rela_gr: check_dict[i] += 1 
+        for i in rela_gr:
+            check_dict[i] += 1
         if check_dict["Self"] == 0:
             # print(hhid)
             print([f"{x} - {y}" for x, y in check_dict.items() if x != "Self"])
@@ -25,17 +27,18 @@ def process_rela(pp_df):
     ls_to_replace = []
     for hhid, rela_gr in zip(gb_df.index, gb_df):
         check_dict = defaultdict(lambda: 0)
-        for i in rela_gr: check_dict[i] += 1
+        for i in rela_gr:
+            check_dict[i] += 1
         if check_dict["Self"] == 0:
             replace_method = "oldest" if check_dict["Spouse"] == 0 else "spouse"
             ls_to_replace.append((hhid, replace_method))
 
     # start to replace to fix errors
     for hhid, replace_method in ls_to_replace:
-        sub_df = pp_df[pp_df["hhid"]==hhid]
+        sub_df = pp_df[pp_df["hhid"] == hhid]
         idx_to_replace = None
         if replace_method == "spouse":
-            sub_sub_df = sub_df[sub_df["relationship"]=="Spouse"]
+            sub_sub_df = sub_df[sub_df["relationship"] == "Spouse"]
             idx_to_replace = sub_sub_df.index[0]
         elif replace_method == "oldest":
             idx_to_replace = sub_df["age"].idxmax()
@@ -44,10 +47,12 @@ def process_rela(pp_df):
 
     # check again
     gb_df_2 = pp_df.groupby("hhid")["relationship"].apply(lambda x: list(x))
-    check_rela_gb(gb_df_2) # Should print nothing
+    check_rela_gb(gb_df_2)  # Should print nothing
 
     # replace values in columns
-    pp_df.loc[~pp_df["relationship"].isin(LS_GR_RELA), "relationship"] = HANDLE_THE_REST_RELA
+    pp_df.loc[
+        ~pp_df["relationship"].isin(LS_GR_RELA), "relationship"
+    ] = HANDLE_THE_REST_RELA
     # print(pp_df["relationship"].unique())
 
-    return pp_df
\ No newline at end of file
+    return pp_df