From d6a1dadb9e3c14494277097dce96954ae89dc4f9 Mon Sep 17 00:00:00 2001 From: Dmitry Samborskiy Date: Wed, 4 Oct 2023 18:36:23 -0400 Subject: [PATCH 1/2] Fix two bugs, add transitivity test --- correlation_clustering.ipynb | 37 +++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/correlation_clustering.ipynb b/correlation_clustering.ipynb index 3f252f6..290d076 100644 --- a/correlation_clustering.ipynb +++ b/correlation_clustering.ipynb @@ -36,7 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "X, y = make_blobs(n_samples=100, centers=5, n_features=2, cluster_std=0.4, random_state=0)" + "X, y = make_blobs(n_samples=100, centers=5, n_features=2, cluster_std=0.4, random_state=0)\n", + "\n", + "# test case for transitivity and unary coding clauses\n", + "# X, y = make_blobs(n_samples=25, centers=5, n_features=2, cluster_std=0.8, random_state=0)" ] }, { @@ -183,12 +186,13 @@ "\n", " # Constraints\n", " f.write('Subject To\\n')\n", - " constraint_i = 0\n", " for i in range(dim-2):\n", " for j in range(i+1, dim-1):\n", " for k in range(j+1, dim):\n", - " constraint_i += 1\n", + " # Note: only one constraint \"x[i,j] & x[j,k] => x[i,k]\" is not enough!\n", " f.write(' x{} + x{} - x{} <= 1\\n'.format(x[i,j], x[j,k], x[i,k]))\n", + " f.write(' x{} + x{} - x{} <= 1\\n'.format(x[i,j], x[i,k], x[j,k]))\n", + " f.write(' x{} + x{} - x{} <= 1\\n'.format(x[i,k], x[j,k], x[i,j]))\n", "\n", " # Binary\n", " f.write('Binary\\n')\n", @@ -560,7 +564,10 @@ "for i in range(dim-2):\n", " for j in range(i+1, dim-1):\n", " for k in range(j+1, dim):\n", + " # Note: only one constraint \"x[i,j] & x[j,k] => x[i,k]\" is not enough (see the test case at the top)\n", " hard_clauses.append('{} -{} -{} {} 0\\n'.format(hard_clause_weight, int_map[i,j], int_map[j,k], int_map[i,k]))\n", + " hard_clauses.append('{} -{} -{} {} 0\\n'.format(hard_clause_weight, int_map[i,j], int_map[i,k], int_map[j,k]))\n", + " hard_clauses.append('{} -{} -{} {} 0\\n'.format(hard_clause_weight, int_map[i,k], int_map[j,k], int_map[i,j]))\n", "\n", "# Soft clauses\n", "soft_clauses= []\n", @@ -673,6 +680,25 @@ " if len(cluster) > 0:\n", " clusters.append(cluster)\n", "\n", + "# make `x` matrix\n", + "x = np.zeros(W.shape, dtype=np.int32)\n", + "for i in range(dim-1):\n", + " for j in range(i+1, dim):\n", + " if inc_dict[int_map[i,j]]:\n", + " x[i,j] = 1\n", + "\n", + "# test transitivity of `x`\n", + "for i in range(dim-2):\n", + " for j in range(i+1, dim-1):\n", + " for k in range(j+1, dim):\n", + " if x[i,j] == 1 and x[j,k] == 1 and x[i,k] == 0:\n", + " print(f\"non-transitive: {i} {j} {k}\")\n", + " if x[i,j] == 1 and x[j,k] == 0 and x[i,k] == 1:\n", + " print(f\"non-transitive: {i} {j} {k}\")\n", + " if x[i,j] == 0 and x[j,k] == 1 and x[i,k] == 1:\n", + " print(f\"non-transitive: {i} {j} {k}\")\n", + "\n", + "\n", "print('Found {} clusters'.format(len(clusters)))" ] }, @@ -784,6 +810,11 @@ " hard_clauses.append('{} -{} {} 0\\n'.format(hard_clause_weight, y[i][k], s[i][k]))\n", " hard_clauses.append('{} -{} {} 0\\n'.format(hard_clause_weight, s[i][k-1], s[i][k]))\n", " hard_clauses.append('{} -{} -{} 0\\n'.format(hard_clause_weight, y[i][k], s[i][k-1]))\n", + " # Also require that sum(y[i][:]) >= 1 (see the test case at the top)\n", + " or_clause = '{}'.format(hard_clause_weight)\n", + " for k in range(K):\n", + " or_clause += ' {}'.format(y[i][k])\n", + " hard_clauses.append(or_clause + ' 0\\n')\n", "\n", "for i in range(dim-1):\n", " for j in range(i+1, dim):\n", From 297fd00539ba27cf69d830e6e041eb05852cd0a0 Mon Sep 17 00:00:00 2001 From: Dmitry Samborskiy Date: Wed, 4 Oct 2023 18:45:24 -0400 Subject: [PATCH 2/2] fix typo --- correlation_clustering.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/correlation_clustering.ipynb b/correlation_clustering.ipynb index 290d076..0713098 100644 --- a/correlation_clustering.ipynb +++ b/correlation_clustering.ipynb @@ -687,7 +687,7 @@ " if inc_dict[int_map[i,j]]:\n", " x[i,j] = 1\n", "\n", - "# test transitivity of `x`\n", + "# test the transitivity of `x`\n", "for i in range(dim-2):\n", " for j in range(i+1, dim-1):\n", " for k in range(j+1, dim):\n",