Skip to content

Commit

Permalink
Readme + correction Ordinal encode
Browse files Browse the repository at this point in the history
  • Loading branch information
luciledierckx committed Apr 20, 2023
1 parent 0d63674 commit 2985632
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 14 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ This work was achieved by Lucile Dierckx, Rosana Veroneze and Siegfried Nijssen

The neural network can be fully translated to an ordered list of decision rules once trained.

To train and test the model, run the *train_and_test_model.py* or *train_and_test_model_ML.py* file.
To train and test the model, run the *train_and_test_model.py* file for binary or multi-class classification, and run the *train_and_test_model_ML.py* file for multi-label classification.

### Dependencies
1. numpy
2. pandas
3. scitkit-learn
3. scikit-learn
4. pytorch


Expand All @@ -21,8 +21,8 @@ In case you want to use our work as part of your research please consider citing
author="Dierckx, Lucile and Veroneze, Rosana and Nijssen, Siegfried",
editor="Kashima, Hisashi and Peng, Wen-Chih and Ide, Tsuyoshi",
title="RL-Net: Interpretable Rule Learning with Neural Networks",
booktitle="Advances in Knowledge Discovery and Data Mining",
booktitle="PAKDD 2023: Advances in Knowledge Discovery and Data Mining",
year="2023",
publisher="Springer International Publishing"
}
```
```
13 changes: 5 additions & 8 deletions dataset_processing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from datasets.FeatureBinarizer import FeatureBinarizer
from sklearn.model_selection import train_test_split

Expand Down Expand Up @@ -59,9 +59,6 @@ def load_and_transform_data(dataset, doFeatureBinarizer=True, doOrdinalEncode=Fa
elif dataset == "drive":
ds = pd.read_csv(ds_folder+"Drive/Sensorless_drive_diagnosis.txt", header=None, skipinitialspace=True, sep=' ')
lb = ds.iloc[:,48]; ds = ds.iloc[:,:48]
elif dataset == "covtype":
ds = pd.read_csv(ds_folder+"Covtype/covtype.data", header=None, skipinitialspace=True)
lb = ds.iloc[:,54]; ds = ds.iloc[:,:54]
elif dataset == "yeast":
ds = pd.read_csv(ds_folder+"Yeast/yeast-train.csv", header=0, skipinitialspace=True)
lb = ds.iloc[:,-14:]; ds = ds.iloc[:,:-14]
Expand Down Expand Up @@ -112,9 +109,9 @@ def load_and_transform_data(dataset, doFeatureBinarizer=True, doOrdinalEncode=Fa
X_tst = fb.transform(X_tst)

elif doOrdinalEncode:
enc = OrdinalEncoder()
X[categorical_cols] = enc.fit_transform(X[categorical_cols])
X_tst[categorical_cols] = enc.transform(X_tst[categorical_cols])
enc = OneHotEncoder(sparse=False)
X = pd.concat((X[numerical_cols],pd.DataFrame(enc.fit_transform(X[categorical_cols])).set_index(X.index)),axis=1)
X_tst = pd.concat((X_tst[numerical_cols], pd.DataFrame(enc.transform(X_tst[categorical_cols])).set_index(X_tst.index)),axis=1)

if dataset not in ["yeast", "scene"]:
le = LabelEncoder()
Expand All @@ -128,5 +125,5 @@ def load_and_transform_data(dataset, doFeatureBinarizer=True, doOrdinalEncode=Fa

if __name__ == "__main__":
dataset = "yeast"
# adult, magic, house, heloc, mushroom, chess, ads, nursery, car, pageblocks, pendigits, contraceptivemc, drive, covtype, yeast, scene
# adult, magic, house, heloc, mushroom, chess, ads, nursery, car, pageblocks, pendigits, contraceptivemc, drive, yeast, scene
X, Y, X_tst, Y_tst = load_and_transform_data(dataset, doFeatureBinarizer=True, doOrdinalEncode=False)
1 change: 0 additions & 1 deletion networkTorch_multiClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ def train(model, X, Y, X_val, Y_val, batch_size, nbOutput, learning_rate=1e-2, l
acc = accuracy_score(y, y_predr)
print('init acc', acc)
print_rules(model)
print(model.and_layer.loc[0])
best_loss = sys.maxsize
best_epoch = -1
best_w = None
Expand Down
2 changes: 1 addition & 1 deletion train_and_test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,5 @@ def worker(name):


name = "mushroom"
#["adult", "magic", "house", "heloc", "mushroom", "chess", "ads", "nursery", "car", "pageblocks", "pendigits", "contraceptivemc", "drive", "covtype"]
#["adult", "magic", "house", "heloc", "mushroom", "chess", "ads", "nursery", "car", "pageblocks", "pendigits", "contraceptivemc", "drive"]
worker(name)

0 comments on commit 2985632

Please sign in to comment.