Fix a few things

mwydmuch · mwydmuch · commit 37dc87ad8979 · 2024-04-07T01:24:29.000+02:00
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -5,7 +5,6 @@
 # Required
 version: 2
 
-
 # Set the OS, Python version and other tools you might need
 build:
   os: ubuntu-22.04
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 [![PyPI version](https://badge.fury.io/py/xcolumns.svg)](https://badge.fury.io/py/xcolumns)
+[![Documentation Status](https://readthedocs.org/projects/xcolumns/badge/?version=latest)](https://xcolumns.readthedocs.io/en/latest/?badge=latest)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -14,6 +14,9 @@
 import sys
 
 
+sys.path.insert(0, os.path.abspath(".."))
+
+
 # -- Project information -----------------------------------------------------
 
 project = "xCOLUMNs"
diff --git a/docs/intro/overview.md b/docs/intro/overview.md
@@ -36,7 +36,7 @@ macro-averaged F-score, Jaccard score, macro-averged precision and recall at k e
 So far, all the algorithms implemented in xCOLUMNs require providing the definition of the target function and marginal conditional probabilities (or their estimates) of labels - $P(y_{j} = 1 | x)$
 for each label $j$ and each instance $x$ resulting in matrix that we denote as $H$ (large $\eta$) and in the names as **`y_proba`** argument of different methods.
 The estimates of label probabilities can be easily obtained from many different classification models, such as logistic regression, neural networks, etc.
-In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probablity estimatation model.
+In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probability estimatation model.
 
 
 ## What methods are implemented in xCOLUMNs?
diff --git a/experiments/run_neurips_2023_bca_experiment.py b/experiments/run_neurips_2023_bca_experiment.py
@@ -45,7 +45,7 @@
     # Block coordinate with default parameters - commented out because it better to use variatns with specific tolerance to stopping condition
     # "block-coord-macro-prec": (predict_optimizing_macro_precision_using_bc, {}),
     # "block-coord-macro-recall": (predict_optimizing_macro_recall_using_bc, {}),
-    # "block-coord-macro-f1": (predict_optimizing_macro_fmeasure_using_bc, {}),
+    # "block-coord-macro-f1": (predict_optimizing_macro_f1_score_using_bc, {}),
     # "block-coord-cov": (predict_optimizing_coverage_using_bc, {}),
     #
     # Tolerance on stopping condiction experiments
@@ -93,23 +93,23 @@
     ),
     #
     "block-coord-macro-f1-tol=1e-3": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"tolerance": 1e-3},
     ),
     "block-coord-macro-f1-tol=1e-4": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"tolerance": 1e-4},
     ),
     "block-coord-macro-f1-tol=1e-5": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"tolerance": 1e-5},
     ),
     "block-coord-macro-f1-tol=1e-6": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"tolerance": 1e-6},
     ),
     "block-coord-macro-f1-tol=1e-7": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"tolerance": 1e-7},
     ),
     #
@@ -143,7 +143,7 @@
         {"init_y_pred": "greedy", "max_iter": 1},
     ),
     "greedy-macro-f1": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"init_y_pred": "greedy", "max_iter": 1},
     ),
     "greedy-cov": (
@@ -160,15 +160,15 @@
         {"max_iter": 1},
     ),
     "block-coord-macro-f1-iter=1": (
-        predict_optimizing_macro_fmeasure_using_bc,
+        predict_optimizing_macro_f1_score_using_bc,
         {"max_iter": 1},
     ),
     "block-coord-cov-iter=1": (predict_optimizing_coverage_using_bc, {"max_iter": 1}),
     #
     # Similar results to the above
     # "greedy-start-block-coord-macro-prec": (predict_optimizing_macro_precision_using_bc, {"init_y_pred": "greedy"},),
-    # "greedy-start-block-coord-macro-recall": (predict_optimizing_macro_fmeasure_using_bc, {"init_y_pred": "greedy"}),
-    # "greedy-start-block-coord-macro-f1": (predict_optimizing_macro_fmeasure_using_bc, {"init_y_pred": "greedy"}),
+    # "greedy-start-block-coord-macro-recall": (predict_optimizing_macro_f1_score_using_bc, {"init_y_pred": "greedy"}),
+    # "greedy-start-block-coord-macro-f1": (predict_optimizing_macro_f1_score_using_bc, {"init_y_pred": "greedy"}),
     # "greedy-start-block-coord-cov": (predict_optimizing_coverage_using_bc, {"init_y_pred": "greedy"}),
 }
 
@@ -193,7 +193,7 @@
 for alpha in alphas:
     pass
     METHODS[f"block-coord-mixed-prec-f1-alpha={alpha}-tol=1e-6"] = (
-        predict_optimizing_mixed_instance_precision_and_macro_fmeasure_using_bc,
+        predict_optimizing_mixed_instance_precision_and_macro_f1_score_using_bc,
         {"alpha": alpha, "tolerance": 1e-6},
     )
     METHODS[f"block-coord-mixed-prec-prec-alpha={alpha}-tol=1e-6"] = (
diff --git a/short_usage_guide.ipynb b/short_usage_guide.ipynb
@@ -47,7 +47,7 @@
     "So far, all the algorithms implemented in xCOLUMNs require providing the definition of the target function and marginal conditional probabilities (or their estimates) of labels - $P(y_{j} = 1 | x)$ \n",
     "for each label $j$ and each instance $x$ resulting in matrix that we denote as $H$ (large $\\eta$) and in the names as **`y_proba`** argument of different methods. \n",
     "The estimates of label probabilities can be easily obtained from many different classification models, such as logistic regression, neural networks, etc. \n",
-    "In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probablity estimatation model.\n",
+    "In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probability estimatation model.\n",
     "\n",
     "\n",
     "### What methods are implemented in xCOLUMNs?\n",
@@ -69,7 +69,7 @@
     "\n",
     "### Installation\n",
     "\n",
-    "xCOLUMNs can be easly installed using pip:"
+    "xCOLUMNs can be easily installed using pip:"
    ]
   },
   {
@@ -171,14 +171,65 @@
     "    y_proba_val = csr_matrix(y_proba_val)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Defininig target metric\n",
+    "\n",
+    "To use the `xcolumns` library, we need to define a target metric. The metric should be a function of confusion matrices for each label. The confusion matrix for each label is a 2x2 matrix with entries: true positives (TP), false positives (FP), false negatives (FN), and true negatives (TN).\n",
+    "Because of that let's first defina a binary metrics function that use values of tp, fp, fn, tn to calculate the value of the metric for each label. Let's define a simple F1 score metric.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def our_binary_f1_score_on_conf_matrix(tp, fp, fn, tn):\n",
+    "    return 2*tp / (2*tp + fp + fn + 1e-6)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Based on the binary metric function we can define a label-wise metric function that accumulates the values of the binary metric for each label.\n",
+    "Lets define a macro-averaged variant of the F1 score metric."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def our_macro_f1_score_on_conf_matrix(tp, fp, fn, tn):\n",
+    "    return our_binary_f1_score_on_conf_matrix(tp, fp, fn, tn).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "xCOLUMNs already provides implementation of popular metricses, that can be found in `xcolumns.metrics` module."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "## Defininig target metric\n",
-    "\n"
+    "from xcolumns.metrics import binary_f1_score_on_conf_matrix, macro_f1_score_on_conf_matrix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's define simple evaluation function that will calculate the value of the metric for the given true and predicted labels."
    ]
   },
   {
@@ -187,16 +238,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from xcolumns.confusion_matrix import calculate_confusion_matrix\n",
-    "from xcolumns.metrics import macro_f1_score_on_conf_matrix\n",
+    "from xcolumns.metrics import calculate_confusion_matrix\n",
     "\n",
     "def evaluate(y_true, y_pred):\n",
     "    \"\"\"\n",
     "    Evaluate the performance of a multioutput classifier.\n",
     "    \"\"\"\n",
     "    C = calculate_confusion_matrix(y_true, y_pred)\n",
-    "    \n",
-    "    print(macro_f1_score_on_conf_matrix(C.tp, C.fp, C.fn, C.tn))\n"
+    "\n",
+    "    # C is ConfusionMatrix object, that contains tp, fp, fn, tn as attributes.\n",
+    "    # C can be unpacked to get the values of tp, fp, fn, tn in this order.\n",
+    "    assert macro_f1_score_on_conf_matrix(C.tp, C.fp, C.fn, C.tn, epsilon=1e-6) == our_macro_f1_score_on_conf_matrix(*C)\n",
+    "    print(our_macro_f1_score_on_conf_matrix(*C))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Simple baseline\n",
+    "\n",
+    "Now let's use the simples inference strategy to predict the 3 labels with largest probability estimates for each instance. We can do that using `predict_top_k` function from `xcolumns.weighted_prediction` module. That implements the simple inference strategy based on the weighting the estimated probabilities of labels."
    ]
   },
   {
@@ -217,21 +279,31 @@
    "metadata": {},
    "source": [
     "### Prediction using Block Coordnate Ascent/Descent method for a given test set\n",
-    "\n"
+    "\n",
+    "Now we can use Block Coordinate Ascent method to find better prediction that maximizes the macro F1 score.\n",
+    "To do that we can use `predict_using_bc_with_0approx` function from `xcolumns.block_coordinate` module.\n",
+    "It requires the matrix of estimated probabilities of labels for each instance, the number of labels to predict for each instance (use 0 if you don't want to use constraint budget), and the target metric function and direction of optimization (maximize or minimize, default is maximize)."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "from xcolumns.block_coordinate import predict_using_bc_with_0approx\n",
+    "\n",
+    "y_pred = predict_using_bc_with_0approx(y_proba_test, our_binary_f1_score_on_conf_matrix, k=3, maximize=True, seed=seed, verbose=True)\n",
+    "evaluate(y_test, y_pred)"
+   ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Finding optimal population classifier using Frank-Wolfe method"
+    "### Finding optimal population classifier using Frank-Wolfe method\n",
+    "\n",
+    "`predict_using_bc_with_0approx` requires the whole matrix of estimated probabilities of labels for each instance. In some cases, one may prefer to have a population classifier that can be used to predict labels for new instances independently. In such cases, one can use the Frank-Wolfe method to find the optimal population classifier. The method requires training set (preferebly other then the training set used to train the probality estimator), the metric (support any function of ConfusionMatrix) and the number of labels to predict for each instance (use 0 if you don't want to use constraint budget)."
    ]
   },
   {
@@ -244,13 +316,19 @@
     "from xcolumns.frank_wolfe import find_classifier_using_fw\n",
     "\n",
     "rnd_clf, meta = find_classifier_using_fw(\n",
-    "    y_val, y_proba_val, macro_f1_score_on_conf_matrix, k = 3, return_meta=True, seed=seed\n",
+    "    y_val, y_proba_val, our_macro_f1_score_on_conf_matrix, k = 3, return_meta=True, seed=seed, verbose=True\n",
     ")\n",
     "\n",
     "y_pred = rnd_clf.predict(y_proba_test, seed=seed)\n",
-    "print(meta[\"time\"])\n",
     "evaluate(y_test, y_pred)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/xcolumns/block_coordinate.py b/xcolumns/block_coordinate.py
diff --git a/xcolumns/metrics.py b/xcolumns/metrics.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`[![PyPI version](https://badge.fury.io/py/xcolumns.svg)](https://badge.fury.io/py/xcolumns)`
	`2`	`+[![Documentation Status](https://readthedocs.org/projects/xcolumns/badge/?version=latest)](https://xcolumns.readthedocs.io/en/latest/?badge=latest)`
`2`	`3`	`[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/)`
`3`	`4`	`[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)`
`4`	`5`