Merge pull request #77 from Mayur1009/feat_multioutput

Coalesced Multioutput classifier
cair · Nov 27, 2024 · 86b1a20 · 86b1a20
2 parents 79cb1c9 + 34eb9c7
commit 86b1a20
Show file tree

Hide file tree

Showing 9 changed files with 683 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -136,3 +136,5 @@ wheelhouse/
 
 cmake-build-debug
 cmake-build-release
+
+.envrc
diff --git a/examples/experimental/classification/FashionMNISTMultioutput.py b/examples/experimental/classification/FashionMNISTMultioutput.py
@@ -0,0 +1,130 @@
+import argparse
+from pprint import pprint
+
+import numpy as np
+from sklearn.datasets import fetch_openml
+from sklearn.metrics import accuracy_score, classification_report, f1_score, hamming_loss, precision_score, recall_score
+from sklearn.model_selection import train_test_split
+from tmu.experimental.models.multioutput_classifier import TMCoalesceMultiOuputClassifier
+
+
+def dataset_fmnist_ch(ch=8):
+    X, y = fetch_openml(
+        "Fashion-MNIST",
+        version=1,
+        return_X_y=True,
+        as_frame=False,
+    )
+
+    xtrain_orig, xtest_orig, ytrain_orig, ytest_orig = train_test_split(X, y, random_state=0, test_size=10000)
+    ytrain_orig = np.array(ytrain_orig, dtype=int)
+    ytest_orig = np.array(ytest_orig, dtype=int)
+
+    xtrain = np.array(xtrain_orig).reshape(-1, 28, 28)
+    xtest = np.array(xtest_orig).reshape(-1, 28, 28)
+
+    out = np.zeros((*xtrain.shape, ch))
+    for j in range(ch):
+        t1 = (j + 1) * 255 / (ch + 1)
+        t2 = (j + 2) * 255 / (ch + 1)
+        out[:, :, :, j] = np.logical_and(xtrain >= t1, xtrain < t2) & 1
+    xtrain = np.array(out)
+
+    out = np.zeros((*xtest.shape, ch))
+    for j in range(ch):
+        t1 = (j + 1) * 255 / (ch + 1)
+        t2 = (j + 2) * 255 / (ch + 1)
+        out[:, :, :, j] = np.logical_and(xtest >= t1, xtest < t2) & 1
+    xtest = np.array(out)
+
+    ytrain = np.zeros((ytrain_orig.shape[0], 10), dtype=int)
+    for i in range(ytrain_orig.shape[0]):
+        ytrain[i, ytrain_orig[i]] = 1
+    ytest = np.zeros((ytest_orig.shape[0], 10), dtype=int)
+    for i in range(ytest_orig.shape[0]):
+        ytest[i, ytest_orig[i]] = 1
+
+    label_names = [
+        "tshirt",
+        "trouser",
+        "pullover",
+        "dress",
+        "coat",
+        "sandal",
+        "shirt",
+        "sneaker",
+        "bag",
+        "ankleboot",
+    ]
+    original = (xtrain_orig, ytrain_orig, xtest_orig, ytest_orig)
+    return original, xtrain, ytrain, xtest, ytest, label_names
+
+
+def arr_div(a, b):
+    return np.divide(a, b, out=np.zeros_like(a, dtype=np.float32), where=b != 0)
+
+
+def metrics(true, pred):
+    land = np.logical_and(true, pred)
+    lor = np.logical_or(true, pred)
+    lxor = np.logical_xor(true, pred)  # symmetric diff
+    n_correct_labels = np.sum(land, axis=1)
+    total_active_labels = np.sum(lor, axis=1)
+    n_miss_preds = np.sum(lxor, axis=1)
+
+    acc = np.mean(arr_div(n_correct_labels, total_active_labels))
+    pre = np.mean(arr_div(n_correct_labels, np.sum(pred, axis=1)))
+    rec = np.mean(arr_div(n_correct_labels, np.sum(true, axis=1)))
+    f1s = 2 * pre * rec / (pre + rec)
+    hml = np.sum(n_miss_preds) / (true.shape[0] * true.shape[1])
+
+    return {
+        "Hamming loss": hml,
+        "Accuracy": acc,
+        "Precision": pre,
+        "Recall": rec,
+        "F1 score": f1s,
+    }
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--clauses", default=2000, type=int)
+    parser.add_argument("--T", default=3125, type=int)
+    parser.add_argument("--s", default=10.0, type=float)
+    parser.add_argument("--q", default=-1, type=float)
+    parser.add_argument("--type_ratio", default=1.0, type=float)
+    parser.add_argument("--platform", default="GPU", type=str)
+    parser.add_argument("--epochs", default=1, type=int)
+    parser.add_argument("--patch", default=10, type=int)
+    args = parser.parse_args()
+
+    params = dict(
+        number_of_clauses=args.clauses,
+        T=args.T,
+        s=args.s,
+        q=args.q,
+        type_i_ii_ratio=args.type_ratio,
+        patch_dim=(args.patch, args.patch),
+        platform=args.platform,
+        seed=10,
+    )
+
+    original, xtrain, ytrain, xtest, ytest, label_names = dataset_fmnist_ch()
+
+    tm = TMCoalesceMultiOuputClassifier(**params)
+
+    print("Training with params: ")
+    pprint(params)
+
+    for epoch in range(args.epochs):
+        print(f"Epoch {epoch}/{args.epochs}")
+        tm.fit(xtrain, ytrain, progress_bar=True)
+        pred = tm.predict(xtest, progress_bar=True)
+
+        met = metrics(ytest, pred)
+        rep = classification_report(ytest, pred, target_names=label_names)
+
+        pprint(met)
+        print(rep)
+        print("------------------------------")
diff --git a/examples/experimental/classification/MNISTMultioutput.py b/examples/experimental/classification/MNISTMultioutput.py
@@ -0,0 +1,83 @@
+import argparse
+from pprint import pprint
+
+import numpy as np
+from sklearn.metrics import accuracy_score, classification_report, f1_score, hamming_loss, precision_score, recall_score
+from tmu.data import MNIST
+from tmu.experimental.models.multioutput_classifier import TMCoalesceMultiOuputClassifier
+
+
+def dataset_mnist():
+    data = MNIST().get()
+    xtrain_orig, xtest_orig, ytrain_orig, ytest_orig = (
+        data["x_train"],
+        data["x_test"],
+        data["y_train"],
+        data["y_test"],
+    )
+
+    xtrain = xtrain_orig.reshape(-1, 28, 28)
+    xtest = xtest_orig.reshape(-1, 28, 28)
+
+    ytrain = np.zeros((ytrain_orig.shape[0], 10), dtype=int)
+    for i in range(ytrain_orig.shape[0]):
+        ytrain[i, ytrain_orig[i]] = 1
+    ytest = np.zeros((ytest_orig.shape[0], 10), dtype=int)
+    for i in range(ytest_orig.shape[0]):
+        ytest[i, ytest_orig[i]] = 1
+
+    label_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+    original = (xtrain_orig, ytrain_orig, xtest_orig, ytest_orig)
+    return original, xtrain, ytrain, xtest, ytest, label_names
+
+def metrics(true, pred):
+    met = {
+        "Subset accuracy": accuracy_score(true, pred),
+        "Hamming loss": hamming_loss(true, pred),
+        "F1 score": f1_score(true, pred, average="weighted"),
+        "Precision": precision_score(true, pred, average="weighted"),
+        "Recall": recall_score(true, pred, average="weighted"),
+    }
+    return met
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--clauses", default=2000, type=int)
+    parser.add_argument("--T", default=3125, type=int)
+    parser.add_argument("--s", default=10.0, type=float)
+    parser.add_argument("--q", default=-1, type=float)
+    parser.add_argument("--type_ratio", default=1.0, type=float)
+    parser.add_argument("--platform", default="GPU", type=str)
+    parser.add_argument("--epochs", default=1, type=int)
+    parser.add_argument("--patch", default=10, type=int)
+    args = parser.parse_args()
+
+    params = dict(
+        number_of_clauses=args.clauses,
+        T=args.T,
+        s=args.s,
+        q=args.q,
+        type_i_ii_ratio=args.type_ratio,
+        patch_dim=(args.patch, args.patch),
+        platform=args.platform,
+        seed=10,
+    )
+
+    original, xtrain, ytrain, xtest, ytest, label_names = dataset_mnist()
+
+    tm = TMCoalesceMultiOuputClassifier(**params)
+
+    print("Training with params: ")
+    pprint(params)
+
+    for epoch in range(args.epochs):
+        print(f"Epoch {epoch}/{args.epochs}")
+        tm.fit(xtrain, ytrain, progress_bar=True)
+        pred = tm.predict(xtest, progress_bar=True)
+
+        met = metrics(ytest, pred)
+        rep = classification_report(ytest, pred, target_names=label_names)
+
+        pprint(met)
+        print(rep)
+        print("------------------------------")
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,8 @@ tests = [
 homepage = "https://github.com/cair/tmu/"
 repository = "https://github.com/cair/tmu/"
 
+[tool.basedpyright]
+typeCheckingMode = "standard"
 
 [tool.setuptools.package-dir]
 tmu = "tmu"

diff --git a/tmu/clause_bank/base_clause_bank.py b/tmu/clause_bank/base_clause_bank.py
@@ -91,3 +91,9 @@ def get_ta_action(self, clause, ta):
         pos = int(
             clause * self.number_of_ta_chunks * self.number_of_state_bits_ta + ta_chunk * self.number_of_state_bits_ta + self.number_of_state_bits_ta - 1)
         return (self.clause_bank[pos] & (1 << chunk_pos)) > 0
+
+    def get_literals(self):
+        nc, nl = self.number_of_clauses, self.number_of_literals
+        results = np.array([[self.get_ta_action(i, j) for j in range(nl)] for i in range(nc)])
+        return results.astype(np.int8)
+
diff --git a/tmu/clause_bank/clause_bank.py b/tmu/clause_bank/clause_bank.py
@@ -133,7 +133,8 @@ def initialize_clauses(self):
             order="c"
         )
 
-        self.clause_bank[:, :, 0: self.number_of_state_bits_ta - 1] = np.uint32(~0)
+        # np.uint32(~0) will be deprecated in numpy>=2.0, changed to np.array(~0).astype(np.uint32)
+        self.clause_bank[:, :, 0: self.number_of_state_bits_ta - 1] = np.array(~0).astype(np.uint32)
         self.clause_bank[:, :, self.number_of_state_bits_ta - 1] = 0
         self.clause_bank = np.ascontiguousarray(self.clause_bank.reshape(
             (self.number_of_clauses * self.number_of_ta_chunks * self.number_of_state_bits_ta)))
@@ -142,7 +143,9 @@ def initialize_clauses(self):
 
         self.clause_bank_ind = np.empty(
             (self.number_of_clauses, self.number_of_ta_chunks, self.number_of_state_bits_ind), dtype=np.uint32)
-        self.clause_bank_ind[:, :, :] = np.uint32(~0)
+
+        # np.uint32(~0) will be deprecated in numpy>=2.0, changed to np.array(~0).astype(np.uint32)
+        self.clause_bank_ind[:, :, :] = np.array(~0).astype(np.uint32)
 
         self.clause_bank_ind = np.ascontiguousarray(self.clause_bank_ind.reshape(
             (self.number_of_clauses * self.number_of_ta_chunks * self.number_of_state_bits_ind)))

diff --git a/tmu/clause_bank/clause_bank_cuda.py b/tmu/clause_bank/clause_bank_cuda.py
@@ -119,6 +119,10 @@ def __init__(
         self.calculate_clause_outputs_update_gpu = mod.get_function("calculate_clause_outputs_update")
         self.calculate_clause_outputs_update_gpu.prepare("PiiiPPPi")
 
+        mod = load_cuda_kernel(parameters, "cuda/calculate_clause_outputs_patchwise.cu")
+        self.calculate_clause_outputs_patchwise_gpu = mod.get_function("calculate_clause_outputs_patchwise")
+        self.calculate_clause_outputs_patchwise_gpu.prepare("PiiiPPi")
+
         mod = load_cuda_kernel(parameters, "cuda/clause_feedback.cu")
         self.type_i_feedback_gpu = mod.get_function("type_i_feedback")
         self.type_i_feedback_gpu.prepare("PPiiiffiiPPPi")
@@ -147,6 +151,7 @@ def __init__(
             dtype=np.uint32,
             order="c"
         )
+        self.clause_output_patchwise_gpu = self._profiler.profile(cuda.mem_alloc, self.clause_output_patchwise.nbytes)
 
         self.clause_active_gpu = self._profiler.profile(cuda.mem_alloc, self.clause_output.nbytes)
         self.literal_active_gpu = self._profiler.profile(cuda.mem_alloc, self.number_of_ta_chunks * 4)
@@ -219,18 +224,23 @@ def calculate_clause_outputs_update(self, literal_active, encoded_X, e):
         return self.clause_output
 
     def calculate_clause_outputs_patchwise(self, encoded_X, e):
-        xi_p = ffi.cast("unsigned int *", Xi.ctypes.data)
-        lib.cb_calculate_clause_outputs_patchwise(
-            self.cb_p,
+
+        self.calculate_clause_outputs_patchwise_gpu.prepared_call(
+            self.grid,
+            self.block,
+            self.clause_bank_gpu,
             self.number_of_clauses,
             self.number_of_literals,
             self.number_of_state_bits_ta,
-            self.number_of_patches,
-            self.cop_p,
-            xi_p
+            self.clause_output_patchwise_gpu,
+            encoded_X,
+            np.int32(e)
         )
+        self.cuda_ctx.synchronize()
+        self._profiler.profile(cuda.memcpy_dtoh, self.clause_output_patchwise, self.clause_output_patchwise_gpu)
         return self.clause_output_patchwise
 
+
     def type_i_feedback(
             self,
             update_p,

diff --git a/tmu/clause_bank/cuda/calculate_clause_outputs_patchwise.cu b/tmu/clause_bank/cuda/calculate_clause_outputs_patchwise.cu
@@ -0,0 +1,67 @@
+/***
+# Copyright (c) 2021 Ole-Christoffer Granmo
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# This code implements the Convolutional Tsetlin Machine from paper arXiv:1905.09688
+# https://arxiv.org/abs/1905.09688
+***/
+#include <curand_kernel.h>
+
+extern "C"
+{
+    __device__ inline void calculate_clause_output_patchwise(unsigned int *ta_state, int number_of_ta_chunks, int number_of_state_bits, unsigned int filter, unsigned int *output, unsigned int *Xi)
+    {
+        for (int patch = 0; patch < NUMBER_OF_PATCHES; ++patch) {
+            output[patch] = 1;
+            for (int k = 0; k < number_of_ta_chunks-1; k++) {
+                unsigned int pos = k*number_of_state_bits + number_of_state_bits-1;
+                output[patch] = output[patch] && (ta_state[pos] & Xi[patch*number_of_ta_chunks + k]) == ta_state[pos]; 
+
+			if (!output[patch]) {
+				break;
+			}
+		}
+
+		unsigned int pos = (number_of_ta_chunks-1)*number_of_state_bits + number_of_state_bits-1;
+		output[patch] = output[patch] &&
+			(ta_state[pos] & Xi[patch*number_of_ta_chunks + number_of_ta_chunks - 1] & filter) ==
+			(ta_state[pos] & filter);
+        }
+    }
+
+    __global__ void calculate_clause_outputs_patchwise(unsigned int *ta_state, int number_of_clauses, int number_of_literals, int number_of_state_bits, unsigned int *clause_output, unsigned int *X, int e)
+    {
+        int index = blockIdx.x * blockDim.x + threadIdx.x;
+        int stride = blockDim.x * gridDim.x;
+
+        unsigned int filter;
+        if (((number_of_literals) % 32) != 0) {
+            filter  = (~(0xffffffff << ((number_of_literals) % 32)));
+        } else {
+            filter = 0xffffffff;
+        }
+        unsigned int number_of_ta_chunks = (number_of_literals-1)/32 + 1;
+
+        for (int j = index; j < number_of_clauses; j += stride) {
+            unsigned int clause_pos = j*number_of_ta_chunks*number_of_state_bits;
+            calculate_clause_output_patchwise(&ta_state[clause_pos], number_of_ta_chunks, number_of_state_bits, filter, &clause_output[j*NUMBER_OF_PATCHES], &X[e*(number_of_ta_chunks*NUMBER_OF_PATCHES)]);
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -136,3 +136,5 @@ wheelhouse/

		cmake-build-debug
		cmake-build-release

		.envrc