Skip to content

Commit 37dc87a

Browse files
committed
Fix a few things
1 parent a5a95f4 commit 37dc87a

File tree

8 files changed

+117
-126
lines changed

8 files changed

+117
-126
lines changed

.readthedocs.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# Required
66
version: 2
77

8-
98
# Set the OS, Python version and other tools you might need
109
build:
1110
os: ubuntu-22.04

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[![PyPI version](https://badge.fury.io/py/xcolumns.svg)](https://badge.fury.io/py/xcolumns)
2+
[![Documentation Status](https://readthedocs.org/projects/xcolumns/badge/?version=latest)](https://xcolumns.readthedocs.io/en/latest/?badge=latest)
23
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/)
34
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
45

docs/conf.py

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
import sys
1515

1616

17+
sys.path.insert(0, os.path.abspath(".."))
18+
19+
1720
# -- Project information -----------------------------------------------------
1821

1922
project = "xCOLUMNs"

docs/intro/overview.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ macro-averaged F-score, Jaccard score, macro-averged precision and recall at k e
3636
So far, all the algorithms implemented in xCOLUMNs require providing the definition of the target function and marginal conditional probabilities (or their estimates) of labels - $P(y_{j} = 1 | x)$
3737
for each label $j$ and each instance $x$ resulting in matrix that we denote as $H$ (large $\eta$) and in the names as **`y_proba`** argument of different methods.
3838
The estimates of label probabilities can be easily obtained from many different classification models, such as logistic regression, neural networks, etc.
39-
In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probablity estimatation model.
39+
In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probability estimatation model.
4040

4141

4242
## What methods are implemented in xCOLUMNs?

experiments/run_neurips_2023_bca_experiment.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
# Block coordinate with default parameters - commented out because it better to use variatns with specific tolerance to stopping condition
4646
# "block-coord-macro-prec": (predict_optimizing_macro_precision_using_bc, {}),
4747
# "block-coord-macro-recall": (predict_optimizing_macro_recall_using_bc, {}),
48-
# "block-coord-macro-f1": (predict_optimizing_macro_fmeasure_using_bc, {}),
48+
# "block-coord-macro-f1": (predict_optimizing_macro_f1_score_using_bc, {}),
4949
# "block-coord-cov": (predict_optimizing_coverage_using_bc, {}),
5050
#
5151
# Tolerance on stopping condiction experiments
@@ -93,23 +93,23 @@
9393
),
9494
#
9595
"block-coord-macro-f1-tol=1e-3": (
96-
predict_optimizing_macro_fmeasure_using_bc,
96+
predict_optimizing_macro_f1_score_using_bc,
9797
{"tolerance": 1e-3},
9898
),
9999
"block-coord-macro-f1-tol=1e-4": (
100-
predict_optimizing_macro_fmeasure_using_bc,
100+
predict_optimizing_macro_f1_score_using_bc,
101101
{"tolerance": 1e-4},
102102
),
103103
"block-coord-macro-f1-tol=1e-5": (
104-
predict_optimizing_macro_fmeasure_using_bc,
104+
predict_optimizing_macro_f1_score_using_bc,
105105
{"tolerance": 1e-5},
106106
),
107107
"block-coord-macro-f1-tol=1e-6": (
108-
predict_optimizing_macro_fmeasure_using_bc,
108+
predict_optimizing_macro_f1_score_using_bc,
109109
{"tolerance": 1e-6},
110110
),
111111
"block-coord-macro-f1-tol=1e-7": (
112-
predict_optimizing_macro_fmeasure_using_bc,
112+
predict_optimizing_macro_f1_score_using_bc,
113113
{"tolerance": 1e-7},
114114
),
115115
#
@@ -143,7 +143,7 @@
143143
{"init_y_pred": "greedy", "max_iter": 1},
144144
),
145145
"greedy-macro-f1": (
146-
predict_optimizing_macro_fmeasure_using_bc,
146+
predict_optimizing_macro_f1_score_using_bc,
147147
{"init_y_pred": "greedy", "max_iter": 1},
148148
),
149149
"greedy-cov": (
@@ -160,15 +160,15 @@
160160
{"max_iter": 1},
161161
),
162162
"block-coord-macro-f1-iter=1": (
163-
predict_optimizing_macro_fmeasure_using_bc,
163+
predict_optimizing_macro_f1_score_using_bc,
164164
{"max_iter": 1},
165165
),
166166
"block-coord-cov-iter=1": (predict_optimizing_coverage_using_bc, {"max_iter": 1}),
167167
#
168168
# Similar results to the above
169169
# "greedy-start-block-coord-macro-prec": (predict_optimizing_macro_precision_using_bc, {"init_y_pred": "greedy"},),
170-
# "greedy-start-block-coord-macro-recall": (predict_optimizing_macro_fmeasure_using_bc, {"init_y_pred": "greedy"}),
171-
# "greedy-start-block-coord-macro-f1": (predict_optimizing_macro_fmeasure_using_bc, {"init_y_pred": "greedy"}),
170+
# "greedy-start-block-coord-macro-recall": (predict_optimizing_macro_f1_score_using_bc, {"init_y_pred": "greedy"}),
171+
# "greedy-start-block-coord-macro-f1": (predict_optimizing_macro_f1_score_using_bc, {"init_y_pred": "greedy"}),
172172
# "greedy-start-block-coord-cov": (predict_optimizing_coverage_using_bc, {"init_y_pred": "greedy"}),
173173
}
174174

@@ -193,7 +193,7 @@
193193
for alpha in alphas:
194194
pass
195195
METHODS[f"block-coord-mixed-prec-f1-alpha={alpha}-tol=1e-6"] = (
196-
predict_optimizing_mixed_instance_precision_and_macro_fmeasure_using_bc,
196+
predict_optimizing_mixed_instance_precision_and_macro_f1_score_using_bc,
197197
{"alpha": alpha, "tolerance": 1e-6},
198198
)
199199
METHODS[f"block-coord-mixed-prec-prec-alpha={alpha}-tol=1e-6"] = (

short_usage_guide.ipynb

+91-13
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"So far, all the algorithms implemented in xCOLUMNs require providing the definition of the target function and marginal conditional probabilities (or their estimates) of labels - $P(y_{j} = 1 | x)$ \n",
4848
"for each label $j$ and each instance $x$ resulting in matrix that we denote as $H$ (large $\\eta$) and in the names as **`y_proba`** argument of different methods. \n",
4949
"The estimates of label probabilities can be easily obtained from many different classification models, such as logistic regression, neural networks, etc. \n",
50-
"In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probablity estimatation model.\n",
50+
"In this sense xCOLUMNs implements plug-in inference methods, that can be used on top of any probability estimatation model.\n",
5151
"\n",
5252
"\n",
5353
"### What methods are implemented in xCOLUMNs?\n",
@@ -69,7 +69,7 @@
6969
"\n",
7070
"### Installation\n",
7171
"\n",
72-
"xCOLUMNs can be easly installed using pip:"
72+
"xCOLUMNs can be easily installed using pip:"
7373
]
7474
},
7575
{
@@ -171,14 +171,65 @@
171171
" y_proba_val = csr_matrix(y_proba_val)"
172172
]
173173
},
174+
{
175+
"cell_type": "markdown",
176+
"metadata": {},
177+
"source": [
178+
"### Defininig target metric\n",
179+
"\n",
180+
"To use the `xcolumns` library, we need to define a target metric. The metric should be a function of confusion matrices for each label. The confusion matrix for each label is a 2x2 matrix with entries: true positives (TP), false positives (FP), false negatives (FN), and true negatives (TN).\n",
181+
"Because of that let's first defina a binary metrics function that use values of tp, fp, fn, tn to calculate the value of the metric for each label. Let's define a simple F1 score metric.\n"
182+
]
183+
},
184+
{
185+
"cell_type": "code",
186+
"execution_count": null,
187+
"metadata": {},
188+
"outputs": [],
189+
"source": [
190+
"def our_binary_f1_score_on_conf_matrix(tp, fp, fn, tn):\n",
191+
" return 2*tp / (2*tp + fp + fn + 1e-6)"
192+
]
193+
},
194+
{
195+
"cell_type": "markdown",
196+
"metadata": {},
197+
"source": [
198+
"Based on the binary metric function we can define a label-wise metric function that accumulates the values of the binary metric for each label.\n",
199+
"Lets define a macro-averaged variant of the F1 score metric."
200+
]
201+
},
202+
{
203+
"cell_type": "code",
204+
"execution_count": null,
205+
"metadata": {},
206+
"outputs": [],
207+
"source": [
208+
"def our_macro_f1_score_on_conf_matrix(tp, fp, fn, tn):\n",
209+
" return our_binary_f1_score_on_conf_matrix(tp, fp, fn, tn).mean()"
210+
]
211+
},
212+
{
213+
"cell_type": "markdown",
214+
"metadata": {},
215+
"source": [
216+
"xCOLUMNs already provides implementation of popular metricses, that can be found in `xcolumns.metrics` module."
217+
]
218+
},
174219
{
175220
"cell_type": "code",
176221
"execution_count": null,
177222
"metadata": {},
178223
"outputs": [],
179224
"source": [
180-
"## Defininig target metric\n",
181-
"\n"
225+
"from xcolumns.metrics import binary_f1_score_on_conf_matrix, macro_f1_score_on_conf_matrix"
226+
]
227+
},
228+
{
229+
"cell_type": "markdown",
230+
"metadata": {},
231+
"source": [
232+
"Now let's define simple evaluation function that will calculate the value of the metric for the given true and predicted labels."
182233
]
183234
},
184235
{
@@ -187,16 +238,27 @@
187238
"metadata": {},
188239
"outputs": [],
189240
"source": [
190-
"from xcolumns.confusion_matrix import calculate_confusion_matrix\n",
191-
"from xcolumns.metrics import macro_f1_score_on_conf_matrix\n",
241+
"from xcolumns.metrics import calculate_confusion_matrix\n",
192242
"\n",
193243
"def evaluate(y_true, y_pred):\n",
194244
" \"\"\"\n",
195245
" Evaluate the performance of a multioutput classifier.\n",
196246
" \"\"\"\n",
197247
" C = calculate_confusion_matrix(y_true, y_pred)\n",
198-
" \n",
199-
" print(macro_f1_score_on_conf_matrix(C.tp, C.fp, C.fn, C.tn))\n"
248+
"\n",
249+
" # C is ConfusionMatrix object, that contains tp, fp, fn, tn as attributes.\n",
250+
" # C can be unpacked to get the values of tp, fp, fn, tn in this order.\n",
251+
" assert macro_f1_score_on_conf_matrix(C.tp, C.fp, C.fn, C.tn, epsilon=1e-6) == our_macro_f1_score_on_conf_matrix(*C)\n",
252+
" print(our_macro_f1_score_on_conf_matrix(*C))\n"
253+
]
254+
},
255+
{
256+
"cell_type": "markdown",
257+
"metadata": {},
258+
"source": [
259+
"### Simple baseline\n",
260+
"\n",
261+
"Now let's use the simples inference strategy to predict the 3 labels with largest probability estimates for each instance. We can do that using `predict_top_k` function from `xcolumns.weighted_prediction` module. That implements the simple inference strategy based on the weighting the estimated probabilities of labels."
200262
]
201263
},
202264
{
@@ -217,21 +279,31 @@
217279
"metadata": {},
218280
"source": [
219281
"### Prediction using Block Coordnate Ascent/Descent method for a given test set\n",
220-
"\n"
282+
"\n",
283+
"Now we can use Block Coordinate Ascent method to find better prediction that maximizes the macro F1 score.\n",
284+
"To do that we can use `predict_using_bc_with_0approx` function from `xcolumns.block_coordinate` module.\n",
285+
"It requires the matrix of estimated probabilities of labels for each instance, the number of labels to predict for each instance (use 0 if you don't want to use constraint budget), and the target metric function and direction of optimization (maximize or minimize, default is maximize)."
221286
]
222287
},
223288
{
224289
"cell_type": "code",
225290
"execution_count": null,
226291
"metadata": {},
227292
"outputs": [],
228-
"source": []
293+
"source": [
294+
"from xcolumns.block_coordinate import predict_using_bc_with_0approx\n",
295+
"\n",
296+
"y_pred = predict_using_bc_with_0approx(y_proba_test, our_binary_f1_score_on_conf_matrix, k=3, maximize=True, seed=seed, verbose=True)\n",
297+
"evaluate(y_test, y_pred)"
298+
]
229299
},
230300
{
231301
"cell_type": "markdown",
232302
"metadata": {},
233303
"source": [
234-
"### Finding optimal population classifier using Frank-Wolfe method"
304+
"### Finding optimal population classifier using Frank-Wolfe method\n",
305+
"\n",
306+
"`predict_using_bc_with_0approx` requires the whole matrix of estimated probabilities of labels for each instance. In some cases, one may prefer to have a population classifier that can be used to predict labels for new instances independently. In such cases, one can use the Frank-Wolfe method to find the optimal population classifier. The method requires training set (preferebly other then the training set used to train the probality estimator), the metric (support any function of ConfusionMatrix) and the number of labels to predict for each instance (use 0 if you don't want to use constraint budget)."
235307
]
236308
},
237309
{
@@ -244,13 +316,19 @@
244316
"from xcolumns.frank_wolfe import find_classifier_using_fw\n",
245317
"\n",
246318
"rnd_clf, meta = find_classifier_using_fw(\n",
247-
" y_val, y_proba_val, macro_f1_score_on_conf_matrix, k = 3, return_meta=True, seed=seed\n",
319+
" y_val, y_proba_val, our_macro_f1_score_on_conf_matrix, k = 3, return_meta=True, seed=seed, verbose=True\n",
248320
")\n",
249321
"\n",
250322
"y_pred = rnd_clf.predict(y_proba_test, seed=seed)\n",
251-
"print(meta[\"time\"])\n",
252323
"evaluate(y_test, y_pred)"
253324
]
325+
},
326+
{
327+
"cell_type": "code",
328+
"execution_count": null,
329+
"metadata": {},
330+
"outputs": [],
331+
"source": []
254332
}
255333
],
256334
"metadata": {

0 commit comments

Comments
 (0)