init

Tony607 · May 27, 2018 · b415c82 · b415c82
commit b415c82
Show file tree

Hide file tree

Showing 5 changed files with 2,836 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,13 @@
+# ignore the results of running code 'results/'
+results/*/*
+results/*.csv
+!/results/exp1/results.csv
+
+*.pyc
+
+*.hdf5
+*.ipynb_checkpoints
+*.p
+*.h5
+*.HDF5
+__pycache__
diff --git a/Keras-DEC.ipynb b/Keras-DEC.ipynb
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+# [How to do Unsupervised Clustering with Keras](https://www.dlology.com/blog/how-to-do-unsupervised-clustering-with-keras/) | DLology Blog
+
+
+## How to Run
+Require [Python 3.5+](https://www.python.org/ftp/python/3.6.4/python-3.6.4.exe) and [Jupyter notebook](https://jupyter.readthedocs.io/en/latest/install.html) installed
+### Clone or download this repo
+```
+git clone https://github.com/Tony607/Keras_Deep_Clustering
+```
+### Install required libraries
+`pip3 install -r requirements.txt`
+
+
+In the project start a command line run
+```
+jupyter notebook
+```
+In the opened browser window open
+```
+Keras-DEC.ipynb
+```
+If you want to skip the training, you can try the pre-trained weights from the releases, [results.zip](https://github.com/Tony607/Keras_Deep_Clustering/releases/download/V0.1/results.zip). Extract 
+`results` folders to the root of the project directory.
+
+Happy coding! Leave a comment if you have any question.
diff --git a/metrics.py b/metrics.py
@@ -0,0 +1,27 @@
+import numpy as np
+from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
+
+nmi = normalized_mutual_info_score
+ari = adjusted_rand_score
+
+
+def acc(y_true, y_pred):
+    """
+    Calculate clustering accuracy. Require scikit-learn installed
+
+    # Arguments
+        y: true labels, numpy.array with shape `(n_samples,)`
+        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
+
+    # Return
+        accuracy, in [0,1]
+    """
+    y_true = y_true.astype(np.int64)
+    assert y_pred.size == y_true.size
+    D = max(y_pred.max(), y_true.max()) + 1
+    w = np.zeros((D, D), dtype=np.int64)
+    for i in range(y_pred.size):
+        w[y_pred[i], y_true[i]] += 1
+    from sklearn.utils.linear_assignment_ import linear_assignment
+    ind = linear_assignment(w.max() - w)
+    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+numpy
+tensorflow
+keras
+scipy
+matplotlib
+sklearn
+seaborn