diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..730bd5c4 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,25 @@ +name: Release + +on: + release: + types: [ published ] + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + - name: Run tox + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python -m tox -e release diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..f7096868 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,27 @@ +name: Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + - name: Run tox with Python ${{ matrix.python-version }} + run: | + python -m tox diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..6adefab6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +# Ignore all +* + +# Unignore dirs +!*/ + +# Unignore specific files without extensions +!AUTHORS +!LICENSE +!py.typed +!.gitignore + +# Unignore useful extensions +!*.in +!*.ini +!*.md +!*.py +!*.pyi +!*.toml +!*.yml diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..36b7ef88 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,6 @@ +The following authors have created the source code of "crowd-kit" published and distributed by YANDEX LLC as the owner: + +Dmitry Ustalov dustalov@yandex-team.ru +Evgeny Tulin tulinev@yandex-team.ru +Nikita Pavlichenko pavlichenko@yandex-team.ru +Vladimir Losev losev@yandex-team.ru diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..85f6d25a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +# Notice to external contributors + + +## General info + +Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the “**CLA**”). The current version of the CLA can be found here: +1) https://yandex.ru/legal/cla/?lang=en (in English) and +2) https://yandex.ru/legal/cla/?lang=ru (in Russian). + +By adopting the CLA, you state the following: + +* You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, +* You have read the terms and conditions of the CLA and agree with them in full, +* You are legally able to provide and license your contributions as stated, +* We may use your contributions for our open source projects and for any other project too, +* We rely on your assurances concerning the rights of third parties in relation to your contributions. + +If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it further in accordance with terms and conditions of the CLA. + +## Provide contributions + +If you have already adopted terms and conditions of the CLA, you are able to provide your contributions. When you submit your first pull request, please add the following information into it: + +``` +I hereby agree to the terms of the CLA available at: [link]. +``` + +Replace the bracketed text as follows: +* [link] is the link to the current version of the CLA: https://yandex.ru/legal/cla/?lang=en (in English) or https://yandex.ru/legal/cla/?lang=ru (in Russian). + +It is enough to provide this notification only once. + +## Other questions + +If you have any questions, please mail us at opensource@yandex-team.ru. diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..6a9f165d --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2020 YANDEX LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..622d6484 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +# Legal +include LICENSE AUTHORS CONTRIBUTING.md + +# Stubs +recursive-include src py.typed +recursive-include src *.pyi diff --git a/README.md b/README.md new file mode 100644 index 00000000..1b09b40d --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# Crowd-kit + +[![GitHub Tests][github_tests_badge]][github_tests_link] + +[github_tests_badge]: https://github.com/Toloka/crowdlib/workflows/Tests/badge.svg?branch=main +[github_tests_link]: https://github.com/Toloka/crowdlib/actions?query=workflow:Tests + + +`crowd-kit` is a Python module for crowdsourcing distributed under the Apache-2.0 license. We strive to implement functionality that eases working with crowd-sourced data. Currently module contains: +* Implementations of commonly used aggregation methods +* A set of metrics + +The module is currenly in a heavy development state and interfaces are subject to change. + +Install +-------------- +Installing crowdlib is as easy as `pip install crowd-kit` + + +Questions and bug reports +-------------- +For reporting bugs please use the [Toloka/bugreport](https://github.com/Toloka/crowdlib/issues) page. + + +License +------- +© YANDEX LLC, 2020-2021. Licensed under the Apache License, Version 2.0. See LICENSE file for more details. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..9787c3bd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..4132c92d --- /dev/null +++ b/setup.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# coding: utf8 + +from setuptools import setup, find_packages + +PREFIX = 'crowdkit' + +setup( + name='crowd-kit', + package_dir={PREFIX: 'src'}, + packages=[f'{PREFIX}.{package}' for package in find_packages('src')], + version='0.0.1', + description='Python libraries for crowdsourcing', + license='Apache 2.0', + author='Vladimir Losev', + author_email='losev@yandex-team.ru', + python_requires='>=3.7.0', + install_requires=[ + 'attrs', + 'numpy', + 'pandas', + 'tqdm', + 'scikit-learn', + 'nltk', + ], + include_package_data=True, +) diff --git a/src/aggregation/__init__.py b/src/aggregation/__init__.py new file mode 100644 index 00000000..9bbf74d5 --- /dev/null +++ b/src/aggregation/__init__.py @@ -0,0 +1,9 @@ +from .dawid_skene import DawidSkene +from .gold_majority_vote import GoldMajorityVote +from .majority_vote import MajorityVote +from .m_msr import MMSR +from .wawa import Wawa +from .zero_based_skill import ZeroBasedSkill +from .hrrasa import HRRASA, RASA + +__all__ = ['DawidSkene', 'MajorityVote', 'MMSR', 'Wawa', 'GoldMajorityVote', 'ZeroBasedSkill', 'HRRASA', 'RASA'] diff --git a/src/aggregation/annotations.py b/src/aggregation/annotations.py new file mode 100644 index 00000000..b0bccc1f --- /dev/null +++ b/src/aggregation/annotations.py @@ -0,0 +1,124 @@ +""" +This module contains reusable annotations that encapsulate both typing +and description for commonly used parameters. These annotations are +used to automatically generate stub files with proper docstrings +""" + +import inspect +import textwrap +from io import StringIO +from typing import ClassVar, Dict, Optional, Type, get_type_hints + +import attr +import pandas as pd + + +@attr.s +class Annotation: + type: Optional[Type] = attr.ib(default=None) + title: Optional[str] = attr.ib(default=None) + description: Optional[str] = attr.ib(default=None) + + def format_google_style_attribute(self, name: str) -> str: + type_str = f' ({getattr(self.type, "__name__", str(self.type))})' if self.type else '' + title = f' {self.title}\n' if self.title else '\n' + description_str = textwrap.indent(f'{self.description}\n', ' ' * 4).lstrip('\n') if self.description else '' + return f'{name}{type_str}:{title}{description_str}' + + def format_google_style_return(self): + type_str = f'{getattr(self.type, "__name__", str(self.type))}' if self.type else '' + title = f' {self.title}\n' if self.title else '\n' + description_str = textwrap.indent(f'{self.description}\n', ' ' * 4).lstrip('\n') if self.description else '' + return f'{type_str}:{title}{description_str}' + + +def manage_docstring(obj): + + attributes: Dict[str, Annotation] = {} + new_annotations = {} + + for key, value in get_type_hints(obj).items(): + if isinstance(value, Annotation): + attributes[key] = value + if value.type is not None: + new_annotations[key] = value.type + else: + new_annotations[key] = value + + return_section = attributes.pop('return', None) + + sio = StringIO() + sio.write(inspect.cleandoc(obj.__doc__ or '')) + + if attributes: + sio.write('\nArgs:\n' if inspect.isfunction(obj) else '\nAttributes:\n') + for key, ann in attributes.items(): + sio.write(textwrap.indent(ann.format_google_style_attribute(key), ' ' * 4)) + + if return_section: + sio.write('Returns:\n') + sio.write(textwrap.indent(return_section.format_google_style_return(), ' ' * 4)) + + obj.__annotations__ = new_annotations + obj.__doc__ = sio.getvalue() + return obj + + +PERFORMERS_SKILLS = Annotation( + type=pd.Series, + title='Predicted skills for each performer', + description=textwrap.dedent("A series of performers' skills indexed by performers"), +) + +PROBAS = Annotation( + type=pd.DataFrame, + title='Estimated label probabilities', + description=textwrap.dedent(''' + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + '''), +) + +PRIORS = Annotation( + type=pd.Series, + title='A prior label distribution', + description="A series of labels' probabilities indexed by labels", +) + +TASKS_LABELS = Annotation( + type=pd.DataFrame, + title='Estimated labels', + description=textwrap.dedent(''' + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise. + '''), +) + +ERRORS = Annotation( + type=pd.DataFrame, + title="Performers' error matrices", + description=textwrap.dedent(''' + A pandas.DataFrame indexed by `performer` and `label` with a column for every + label_id found in `data` such that `result.loc[performer, observed_label, true_label]` + is the probability of `performer` producing an `observed_label` given that a task's + true label is `true_label` + '''), +) + +DATA = Annotation( + type=pd.DataFrame, + title='Input data', + description='A pandas.DataFrame containing `task`, `performer` and `label` columns', +) + + +def _make_opitonal_classlevel(annotation: Annotation): + return attr.evolve(annotation, type=ClassVar[Optional[annotation.type]]) + + +OPTIONAL_CLASSLEVEL_PERFORMERS_SKILLS = _make_opitonal_classlevel(PERFORMERS_SKILLS) +OPTIONAL_CLASSLEVEL_PROBAS = _make_opitonal_classlevel(PROBAS) +OPTIONAL_CLASSLEVEL_PRIORS = _make_opitonal_classlevel(PRIORS) +OPTIONAL_CLASSLEVEL_TASKS_LABELS = _make_opitonal_classlevel(TASKS_LABELS) +OPTIONAL_CLASSLEVEL_ERRORS = _make_opitonal_classlevel(ERRORS) diff --git a/src/aggregation/base_aggregator.py b/src/aggregation/base_aggregator.py new file mode 100644 index 00000000..e5c6090f --- /dev/null +++ b/src/aggregation/base_aggregator.py @@ -0,0 +1,89 @@ +__all__ = ['BaseAggregator'] + +import random +from typing import Union, Tuple + +import attr +import pandas as pd + +from . import annotations +from .annotations import manage_docstring + + +@attr.attrs(auto_attribs=True) +@manage_docstring +class BaseAggregator: + """Base functions and fields for all aggregators""" + + tasks_labels: annotations.OPTIONAL_CLASSLEVEL_TASKS_LABELS = None + probas: annotations.OPTIONAL_CLASSLEVEL_PROBAS = None + performers_skills: annotations.OPTIONAL_CLASSLEVEL_PERFORMERS_SKILLS = None + + @staticmethod + def _max_probas_random_on_ties(x: Union[pd.DataFrame, pd.Series]) -> Tuple[str, float]: + """Chooses max 'proba' value and return 'label' from same rows + If several rows have same 'proba' - choose random + """ + max_proba = x.proba.max() + max_label_index = random.choice(x[x.proba==max_proba].index) + return x.label[max_label_index], max_proba + + @manage_docstring + def _calculate_probabilities(self, estimated_answers: pd.DataFrame) -> annotations.PROBAS: + """Calculate probabilities for each task for each label + + Note: + All "score" must be positive. + If the sum of scores for a task is zero, then all probabilities for this task will be NaN. + + Args: + estimated_answers(pandas.DataFrame): Frame with "score" for each pair task-label. + Should contain columns 'score', 'task', 'label' + + """ + assert (estimated_answers.score >= 0).all(), 'In answers exists some "score" with negative value' + + estimated_answers['proba'] = estimated_answers.score / estimated_answers.groupby('task').score.transform('sum') + self.probas = estimated_answers.pivot(index='task', columns='label', values='proba') + return self.probas + + @manage_docstring + def _choose_labels(self, labels_probas: annotations.PROBAS) -> annotations.TASKS_LABELS: + """Selection of the labels with the most probalitities""" + self.tasks_labels = labels_probas.idxmax(axis="columns").reset_index(name='label') + return self.tasks_labels + + @manage_docstring + def _calc_performers_skills(self, answers: pd.DataFrame, task_truth: pd.DataFrame) -> annotations.PERFORMERS_SKILLS: + """Calculates skill for each performer + + Note: + There can be only one * correct label * + + Args: + answers (pandas.DataFrame): performers answers for tasks + Should contain columns 'task', 'performer', 'label' + task_truth (pandas.DataFrame): label regarding which to count the skill + Should contain columns 'task', 'label' + Could contain column 'weight' + """ + def _agreed_on_task(x): + """Calculates performers agreed for each based on: + - result label in 'task_truth', + - performer label in 'answers', + - and 'weight' if it's exist + """ + return int(x['label'] == x['label_truth']) * x.get('weight', 1) + + answers_with_results = answers.merge(task_truth, on='task', suffixes=('', '_truth')) + answers_with_results['skill'] = answers_with_results.apply(_agreed_on_task, axis=1) + self.performers_skills = answers_with_results.groupby('performer')['skill'].agg('mean').reset_index() + return self.performers_skills + + def _answers_base_checks(self, answers: pd.DataFrame) -> None: + """Checks basic 'answers' dataset requirements""" + if not isinstance(answers, pd.DataFrame): + raise TypeError('Working only with pandas DataFrame') + assert 'task' in answers, 'There is no "task" column in answers' + assert 'performer' in answers, 'There is no "performer" column in answers' + assert 'label' in answers, 'There is no "label" column in answers' diff --git a/src/aggregation/base_aggregator.pyi b/src/aggregation/base_aggregator.pyi new file mode 100644 index 00000000..f2474239 --- /dev/null +++ b/src/aggregation/base_aggregator.pyi @@ -0,0 +1,82 @@ +from pandas.core.frame import DataFrame +from pandas.core.series import Series +from typing import ClassVar, Tuple, Union, Optional + +class BaseAggregator: + """Base functions and fields for all aggregators + Attributes: + tasks_labels (typing.ClassVar[typing.Optional[pandas.core.frame.DataFrame]]): Estimated labels + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise. + + probas (typing.ClassVar[typing.Optional[pandas.core.frame.DataFrame]]): Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + + performers_skills (typing.ClassVar[typing.Optional[pandas.core.series.Series]]): Predicted skills for each performer + A series of performers' skills indexed by performers""" + + tasks_labels: ClassVar[Optional[DataFrame]] + probas: ClassVar[Optional[DataFrame]] + performers_skills: ClassVar[Optional[Series]] + + def __init__(self) -> None: + """Method generated by attrs for class BaseAggregator.""" + ... + + def _answers_base_checks(self, answers: DataFrame) -> None: + """Checks basic 'answers' dataset requirements""" + ... + + def _calc_performers_skills(self, answers: DataFrame, task_truth: DataFrame) -> Series: + """Calculates skill for each performer + + Note: + There can be only one * correct label * + + Args: + answers (pandas.DataFrame): performers answers for tasks + Should contain columns 'task', 'performer', 'label' + task_truth (pandas.DataFrame): label regarding which to count the skill + Should contain columns 'task', 'label' + Could contain column 'weight'Returns: + Series: Predicted skills for each performer + A series of performers' skills indexed by performers""" + ... + + def _calculate_probabilities(self, estimated_answers: DataFrame) -> DataFrame: + """Calculate probabilities for each task for each label + + Note: + All "score" must be positive. + If the sum of scores for a task is zero, then all probabilities for this task will be NaN. + + Args: + estimated_answers(pandas.DataFrame): Frame with "score" for each pair task-label. + Should contain columns 'score', 'task', 'label'Returns: + DataFrame: Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`.""" + ... + + def _choose_labels(self, labels_probas: DataFrame) -> DataFrame: + """Selection of the labels with the most probalitities + Args: + labels_probas (DataFrame): Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + + Returns: + DataFrame: Estimated labels + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise.""" + ... + + @staticmethod + def _max_probas_random_on_ties(x: Union[DataFrame, Series]) -> Tuple[str, float]: + """Chooses max 'proba' value and return 'label' from same rows + If several rows have same 'proba' - choose random""" + ... diff --git a/src/aggregation/base_embedding_aggregator.py b/src/aggregation/base_embedding_aggregator.py new file mode 100644 index 00000000..c573a353 --- /dev/null +++ b/src/aggregation/base_embedding_aggregator.py @@ -0,0 +1,87 @@ +from typing import Any, Optional + +import numpy as np +import pandas as pd +from sklearn.neighbors import NearestNeighbors +from tqdm.auto import tqdm + +from .base_aggregator import BaseAggregator + + +class BaseEmbeddingAggregator(BaseAggregator): + """Base class for aggregation algorithms that operate with embeddings of performers answers. + + Attributes: + aggregated_embeddings_ (Optional[pd.Series]): result of embeddings aggregation for each task. + golden_embeddings_: (Optional[pd.Series]): embeddings of golden outputs if the golden outputs are provided. + """ + + def __init__(self, encoder: Any, silent: bool): + self.encoder = encoder + self.silent = silent + self.aggregated_embeddings_: Optional[pd.Series] = None + self.golden_embeddings_: Optional[pd.Series] = None + + def _answers_base_checks(self, answers: pd.DataFrame): + if not isinstance(answers, pd.DataFrame): + raise TypeError('Working only with pandas DataFrame') + assert 'task' in answers, 'There is no "task" column in answers' + assert 'performer' in answers, 'There is no "performer" column in answers' + assert 'output' in answers, 'There is no "output" column in answers' + + def _get_embeddings(self, answers: pd.DataFrame): + """Obtaines embeddings for performers answers. + """ + if not self.silent: + tqdm.pandas() + answers['embedding'] = answers.output.progress_apply(self.encoder.encode) + else: + answers['embedding'] = answers.output.apply(self.encoder.encode) + + def _get_golden_embeddings(self, answers: pd.DataFrame): + """Processes embeddings for golden outputs. + """ + if 'golden_embedding' not in answers: + golden_tasks = answers[answers['golden'].notna()][['task', 'golden']].drop_duplicates().set_index('task') + golden_tasks['golden_embedding'] = golden_tasks.golden.apply(self.encoder.encode) + else: + golden_tasks = answers[answers['golden'].notna()][['task', 'golden', 'golden_embedding']].drop_duplicates(['task']).set_index('task') + self.golden_embeddings_ = golden_tasks['golden_embedding'] + + def _init_performers_reliabilities(self, answers: pd.DataFrame): + """Initialize performers reliabilities by ones. + """ + performers = pd.unique(answers.performer) + self.performers_reliabilities_ = pd.Series(np.ones(len(performers)), index=performers) + + def _aggregate_embeddings(self, answers: pd.DataFrame): + """Calculates weighted average of embeddings for each task. + """ + answers['weighted_embeddings'] = answers.score * answers.embedding + self.aggregated_embeddings_ = answers.groupby('task').weighted_embeddings.apply(np.sum) / answers.groupby('task').score.sum() + if self.golden_embeddings_ is not None: + for task, embedding in self.golden_embeddings_.iteritems(): + self.aggregated_embeddings_[task] = embedding + + def _distance_from_aggregated(self, answers: pd.DataFrame): + """Calculates the square of Euclidian distance from aggregated embedding for each answer. + """ + with_task_aggregate = answers.set_index('task') + with_task_aggregate['task_aggregate'] = self.aggregated_embeddings_ + with_task_aggregate['distance'] = with_task_aggregate.apply(lambda row: np.sum((row['embedding'] - row['task_aggregate']) ** 2), axis=1) + with_task_aggregate['distance'] = with_task_aggregate['distance'].replace({0.0: 1e-5}) # avoid division by zero + return with_task_aggregate.reset_index() + + def _choose_nearest_output(self, answers, metric='cosine'): + """Choses nearest performers answer according to aggregated embeddings. + """ + aggregated_output = [] + tasks = [] + for task, assignments in answers.groupby('task'): + embeddigs = np.array(list(assignments['embedding'])) + outputs = list(assignments['output']) + knn = NearestNeighbors(algorithm='brute', metric='cosine').fit(embeddigs) + _, res_ind = knn.kneighbors([self.aggregated_embeddings_[task]], 1) + aggregated_output.append(outputs[res_ind[0][0]]) + tasks.append(task) + return pd.Series(aggregated_output, index=tasks) diff --git a/src/aggregation/dawid_skene.py b/src/aggregation/dawid_skene.py new file mode 100644 index 00000000..39663553 --- /dev/null +++ b/src/aggregation/dawid_skene.py @@ -0,0 +1,94 @@ +__all__ = ['DawidSkene'] + +import numpy as np + +from . import annotations +from .annotations import manage_docstring, Annotation +from .base_aggregator import BaseAggregator +from .majority_vote import MajorityVote + +_EPS = np.float_power(10, -10) + + +@manage_docstring +class DawidSkene(BaseAggregator): + """ + Dawid-Skene aggregation model + A. Philip Dawid and Allan M. Skene. 1979. + Maximum Likelihood Estimation of Observer Error-Rates Using the EM Algorithm. + Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 28, 1 (1979), 20–28. + + https://doi.org/10.2307/2346806 + """ + + probas: annotations.OPTIONAL_CLASSLEVEL_PROBAS + priors: annotations.OPTIONAL_CLASSLEVEL_PRIORS + task_labels: annotations.OPTIONAL_CLASSLEVEL_TASKS_LABELS + errors: annotations.OPTIONAL_CLASSLEVEL_ERRORS + + def __init__(self, n_iter: int): + """ + Args: + n_iter: Number of iterations to perform + """ + self.n_iter = n_iter + self.proba = None + self.priors = None + self.tasks_labels = None + self.errors = None + + @staticmethod + @manage_docstring + def _m_step(data: annotations.DATA, probas: annotations.PROBAS) -> annotations.ERRORS: + """Perform M-step of Dawid-Skene algorithm. + + Given performers' answers and tasks' true labels probabilities estimates + performer's errors probabilities matrix. + """ + joined = data.join(probas, on='task') + errors = joined.groupby(['performer', 'label'], sort=False).sum() + errors.clip(lower=_EPS, inplace=True) + errors /= errors.groupby('performer', sort=False).sum() + return errors + + @staticmethod + @manage_docstring + def _e_step(data: annotations.DATA, priors: annotations.PROBAS, errors: annotations.ERRORS) -> annotations.PROBAS: + """ + Perform E-step of Dawid-Skene algorithm. + + Given performer's answers, labels' prior probabilities and performer's performer's + errors probabilities matrix estimates tasks' true labels probabilities. + """ + joined = data.join(errors, on=['performer', 'label']) + joined.drop(columns=['performer', 'label'], inplace=True) + probas = priors * joined.groupby('task', sort=False).prod() + return probas.div(probas.sum(axis=1), axis=0) + + @manage_docstring + def fit(self, data: annotations.DATA) -> Annotation(type='DawidSkene', title='self'): + + # Initialization + data = data[['task', 'performer', 'label']] + self.probas = MajorityVote().fit_predict_proba(data).fillna(0) + self.priors = self.probas.mean() + self.errors = self._m_step(data, self.probas) + + # Updating proba and errors n_iter times + for _ in range(self.n_iter): + self.probas = self._e_step(data, self.priors, self.errors) + self.priors = self.probas.mean() + self.errors = self._m_step(data, self.probas) + + # Saving results + self.task_labels = self._choose_labels(self.probas) + + return self + + @manage_docstring + def fit_predict_proba(self, data: annotations.DATA) -> annotations.PROBAS: + return self.fit(data).probas + + @manage_docstring + def fit_predict(self, data: annotations.DATA) -> annotations.TASKS_LABELS: + return self.fit(data).tasks_labels diff --git a/src/aggregation/dawid_skene.pyi b/src/aggregation/dawid_skene.pyi new file mode 100644 index 00000000..ea2307ab --- /dev/null +++ b/src/aggregation/dawid_skene.pyi @@ -0,0 +1,120 @@ +from crowdkit.aggregation.base_aggregator import BaseAggregator +from pandas.core.frame import DataFrame +from pandas.core.series import Series +from typing import ClassVar, Optional + + +class DawidSkene(BaseAggregator): + """Dawid-Skene aggregation model + A. Philip Dawid and Allan M. Skene. 1979. + Maximum Likelihood Estimation of Observer Error-Rates Using the EM Algorithm. + Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 28, 1 (1979), 20–28. + + https://doi.org/10.2307/2346806 + Attributes: + probas (typing.ClassVar[typing.Optional[pandas.core.frame.DataFrame]]): Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + + priors (typing.ClassVar[typing.Optional[pandas.core.series.Series]]): A prior label distribution + A series of labels' probabilities indexed by labels + task_labels (typing.ClassVar[typing.Optional[pandas.core.frame.DataFrame]]): Estimated labels + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise. + + errors (typing.ClassVar[typing.Optional[pandas.core.frame.DataFrame]]): Performers' error matrices + A pandas.DataFrame indexed by `performer` and `label` with a column for every + label_id found in `data` such that `result.loc[performer, observed_label, true_label]` + is the probability of `performer` producing an `observed_label` given that a task's + true label is `true_label`""" + + tasks_labels: ClassVar[Optional[DataFrame]] + probas: ClassVar[Optional[DataFrame]] + performers_skills: ClassVar[Optional[Series]] + priors: ClassVar[Optional[Series]] + task_labels: ClassVar[Optional[DataFrame]] + errors: ClassVar[Optional[DataFrame]] + + def __init__(self, n_iter: int): + """Args: + n_iter: Number of iterations to perform""" + ... + + @staticmethod + def _e_step(data: DataFrame, priors: DataFrame, errors: DataFrame) -> DataFrame: + """Perform E-step of Dawid-Skene algorithm. + + Given performer's answers, labels' prior probabilities and performer's performer's + errors probabilities matrix estimates tasks' true labels probabilities. + Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + priors (DataFrame): Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + + errors (DataFrame): Performers' error matrices + A pandas.DataFrame indexed by `performer` and `label` with a column for every + label_id found in `data` such that `result.loc[performer, observed_label, true_label]` + is the probability of `performer` producing an `observed_label` given that a task's + true label is `true_label` + + Returns: + DataFrame: Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`.""" + ... + + @staticmethod + def _m_step(data: DataFrame, probas: DataFrame) -> DataFrame: + """Perform M-step of Dawid-Skene algorithm. + + Given performers' answers and tasks' true labels probabilities estimates + performer's errors probabilities matrix. + Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + probas (DataFrame): Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`. + + Returns: + DataFrame: Performers' error matrices + A pandas.DataFrame indexed by `performer` and `label` with a column for every + label_id found in `data` such that `result.loc[performer, observed_label, true_label]` + is the probability of `performer` producing an `observed_label` given that a task's + true label is `true_label`""" + ... + + def fit(self, data: DataFrame) -> 'DawidSkene': + """Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + Returns: + DawidSkene: self""" + ... + + def fit_predict(self, data: DataFrame) -> DataFrame: + """Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + Returns: + DataFrame: Estimated labels + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise.""" + ... + + def fit_predict_proba(self, data: DataFrame) -> DataFrame: + """Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + Returns: + DataFrame: Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`.""" + ... diff --git a/src/aggregation/gold_majority_vote.py b/src/aggregation/gold_majority_vote.py new file mode 100644 index 00000000..f616de20 --- /dev/null +++ b/src/aggregation/gold_majority_vote.py @@ -0,0 +1,121 @@ +__all__ = ['GoldMajorityVote'] + +import attr +import pandas as pd + +from . import annotations +from .annotations import manage_docstring +from .base_aggregator import BaseAggregator + + +@attr.attrs(auto_attribs=True) +@manage_docstring +class GoldMajorityVote(BaseAggregator): + """Majority Vote when exist golden dataset (ground truth) for some tasks + + Calculates the probability of a correct label for each performer based on the golden set + Based on this, for each task, calculates the sum of the probabilities of each label + The correct label is the one where the sum of the probabilities is greater + + For Example: You have 10k tasks completed by 3k different performers. And you have 100 tasks where you already + know ground truth labels. First you can call 'fit' to calc percents of correct labels for each performers. + And then call 'predict' to calculate labels for you 10k tasks. + + It's necessary that: + 1. All performers must done at least one task from golden dataset. + 2. All performers in dataset that send to 'predict', existed in answers dataset that was sent to 'fit' + + After fit stored 'performers_skills' - Predicted labels for each task. + + After predicting stored different data frames (details in BaseAggregator): + tasks_labels: Predicted labels for each task + probas: Probabilities for each label for task + """ + + def fit(self, answers_on_gold: pd.DataFrame, gold_df: pd.DataFrame) -> 'GoldMajorityVote': + """Calculates the skill for each performers, based on answers on golden dataset + The calculated skills are stored in an instance of the class and can be obtained by the field 'performers_skills' + After 'fit' you can get 'performer_skills' from class field. + + Args: + answers_on_gold(pandas.DataFrame): Frame contains performers answers on golden tasks. One row per answer. + Should contain columns 'performer', 'task', 'label'. Dataframe could contains answers not only for golden + tasks. This answers will be ignored. + gold_df(pandas.DataFrame): Frame with ground truth labels for tasks. + Should contain columns 'performer', 'task'. And may contain column 'weight', if you have different scores + for different tasks. + Returns: + GoldMajorityVote: self for call next methods + + Raises: + TypeError: If the input datasets are not of type pandas.DataFrame. + AssertionError: If there is some collumn missing in 'dataframes'. Or if it's impossible to calculate the + skill for any performer. For example, some performers do not have answers to tasks from the golden dataset. + """ + self._answers_base_checks(answers_on_gold) + + if not isinstance(gold_df, pd.DataFrame): + raise TypeError('"gold_df" parameter must be of type pandas DataFrame') + assert 'task' in gold_df, 'There is no "task" column in "gold_df"' + assert 'label' in gold_df, 'There is no "label" column in "gold_df"' + + # checking that we can compute skills for all performers + answers_with_truth = answers_on_gold.merge(gold_df, on='task', suffixes=('', '_truth')) + performers_without_skill = set(answers_on_gold['performer'].unique()) - set(answers_with_truth['performer'].unique()) + assert not performers_without_skill, 'It is impossible to compute skills for some performers in "crowd_on_gold_df"'\ + ' because of that performers did not complete any golden task (no tasks for this performers in "gold_df"))' + + self._calc_performers_skills(answers_on_gold, gold_df) + return self + + @manage_docstring + def predict(self, data: annotations.DATA) -> annotations.TASKS_LABELS: + """Predict correct labels for tasks. Using calculated performers skill, stored in self instance. + After 'predict' you can get probabilities for all labels from class field 'probas'. + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers_on_gold' in 'fit'. + """ + self._predict_impl(data) + return self.tasks_labels + + @manage_docstring + def predict_proba(self, data: annotations.DATA) -> annotations.PROBAS: + """Calculates Probabilities for each label of task. + If it was no such label for some task, this task doesn't has probs for this label. + After 'predict_proba' you can get predicted labels from class field 'tasks_labels'. + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers_on_gold' in 'fit'. + """ + self._predict_impl(data) + return self.probas + + @manage_docstring + def _predict_impl(self, answers: annotations.DATA) -> None: + self._answers_base_checks(answers) + + assert self.performers_skills is not None, '"Predict" called without "fit".' + + # checking that all performers in crowd_df has skills in "performers_skills" + performers_without_skill_in_crowd = set(answers['performer'].unique()) - set(self.performers_skills['performer'].unique()) + assert not performers_without_skill_in_crowd, 'Unknown skill for some performers in "crowd_df"'\ + ' because of that performers have no tasks in "crowd_on_gold_df"' + + # join labels and skills + labels_probas = answers.merge(self.performers_skills, on='performer') + labels_probas = ( + labels_probas + .groupby(['task', 'label']) + .agg({'skill': sum}) + .reset_index() + .rename(columns={'skill': 'score'})) + + labels_probas = self._calculate_probabilities(labels_probas) + self._choose_labels(labels_probas) diff --git a/src/aggregation/gold_majority_vote.pyi b/src/aggregation/gold_majority_vote.pyi new file mode 100644 index 00000000..f8ab5179 --- /dev/null +++ b/src/aggregation/gold_majority_vote.pyi @@ -0,0 +1,103 @@ +import attr +import pandas +import crowdkit.aggregation.annotations + +from pandas.core.frame import DataFrame +from pandas.core.series import Series +from crowdkit.aggregation.base_aggregator import BaseAggregator +from typing import Optional + + +class GoldMajorityVote(BaseAggregator): + """Majority Vote when exist golden dataset (ground truth) for some tasks + + Calculates the probability of a correct label for each performer based on the golden set + Based on this, for each task, calculates the sum of the probabilities of each label + The correct label is the one where the sum of the probabilities is greater + + For Example: You have 10k tasks completed by 3k different performers. And you have 100 tasks where you already + know ground truth labels. First you can call 'fit' to calc percents of correct labels for each performers. + And then call 'predict' to calculate labels for you 10k tasks. + + It's necessary that: + 1. All performers must done at least one task from golden dataset. + 2. All performers in dataset that send to 'predict', existed in answers dataset that was sent to 'fit' + + After fit stored 'performers_skills' - Predicted labels for each task. + + After predicting stored different data frames (details in BaseAggregator): + tasks_labels: Predicted labels for each task + probas: Probabilities for each label for task""" + + tasks_labels: Optional[DataFrame] + probas: Optional[DataFrame] + performers_skills: Optional[Series] + + def __init__(self, tasks_labels: Optional[DataFrame], probas: Optional[DataFrame], performers_skills: Optional[Series]) -> None: + """Method generated by attrs for class GoldMajorityVote.""" + ... + + def _predict_impl(self, answers: DataFrame) -> NoneType: + """Args: + answers (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns""" + ... + + def fit(self, answers_on_gold: DataFrame, gold_df: DataFrame) -> 'GoldMajorityVote': + """Calculates the skill for each performers, based on answers on golden dataset + The calculated skills are stored in an instance of the class and can be obtained by the field 'performers_skills' + After 'fit' you can get 'performer_skills' from class field. + + Args: + answers_on_gold(pandas.DataFrame): Frame contains performers answers on golden tasks. One row per answer. + Should contain columns 'performer', 'task', 'label'. Dataframe could contains answers not only for golden + tasks. This answers will be ignored. + gold_df(pandas.DataFrame): Frame with ground truth labels for tasks. + Should contain columns 'performer', 'task'. And may contain column 'weight', if you have different scores + for different tasks. + Returns: + GoldMajorityVote: self for call next methods + + Raises: + TypeError: If the input datasets are not of type pandas.DataFrame. + AssertionError: If there is some collumn missing in 'dataframes'. Or if it's impossible to calculate the + skill for any performer. For example, some performers do not have answers to tasks from the golden dataset.""" + ... + + def predict(self, data: DataFrame) -> DataFrame: + """Predict correct labels for tasks. Using calculated performers skill, stored in self instance. + After 'predict' you can get probabilities for all labels from class field 'probas'. + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers_on_gold' in 'fit'. + Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + Returns: + DataFrame: Estimated labels + A pandas.DataFrame indexed by `task` with a single column `label` containing + `tasks`'s most probable label for last fitted data, or None otherwise.""" + ... + + def predict_proba(self, data: DataFrame) -> DataFrame: + """Calculates Probabilities for each label of task. + If it was no such label for some task, this task doesn't has probs for this label. + After 'predict_proba' you can get predicted labels from class field 'tasks_labels'. + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers_on_gold' in 'fit'. + Args: + data (DataFrame): Input data + A pandas.DataFrame containing `task`, `performer` and `label` columns + Returns: + DataFrame: Estimated label probabilities + A frame indexed by `task` and a column for every label id found + in `data` such that `result.loc[task, label]` is the probability of `task`'s + true label to be equal to `label`.""" + ... diff --git a/src/aggregation/hrrasa.py b/src/aggregation/hrrasa.py new file mode 100644 index 00000000..e5a6054a --- /dev/null +++ b/src/aggregation/hrrasa.py @@ -0,0 +1,202 @@ +from typing import Any, Iterator, Tuple, Union +import pandas as pd +import scipy.stats as sps +import numpy as np +from tqdm.auto import tqdm +import nltk.translate.gleu_score as gleu +# from sentence_transformers import SentenceTransformer + +from .base_embedding_aggregator import BaseEmbeddingAggregator + + +def glue_similarity(hyp, ref): + return gleu.sentence_gleu([hyp.split()], ref) + + +class HRRASA(BaseEmbeddingAggregator): + """ + Hybrid Reliability and Representation Aware Sequence Aggregation + Jiyi Li. 2020. + Crowdsourced Text Sequence Aggregation based on Hybrid Reliability and Representation. + Proceedings of the 43rd International ACM SIGIR Conference on Research and Development + in Information Retrieval (SIGIR ’20), July 25–30, 2020, Virtual Event, China. ACM, New York, NY, USA, + + https://doi.org/10.1145/3397271.3401239 + """ + + def __init__(self, n_iter: int = 100, encoder: Any = None, output_similarity: Any = glue_similarity, lambda_emb: float = 0.5, lambda_out: float = 0.5, alpha: float = 0.05, silent: bool = True): + """ + Args: + n_iter: A number of iterations. Default: 100. + encoder: A class that encodes a given performer's output into a fixed-size vector (ndarray) + with `encode` method. default: `paraphrase-distilroberta-base-v1` from sentence-transformers. + output_similarity: a similarity metric on raw outputs. A function that takes two arguments: performer's + outputs and returns a single number — a similarity measure. default: GLUE. + lambda_emb: embedding reliablity weight. default: 0.5. + lambda_out: raw output reliability weight. default: 0.5. + alpha: confidence level for processing performers' reliabilities. + silent: if not set, shows progress-bar during the training. default: True. + """ + super(HRRASA, self).__init__(encoder, silent) + self.n_iter = n_iter + + self._output_similarity = output_similarity + self.lambda_emb = lambda_emb + self.lambda_out = lambda_out + self.alpha = alpha + + def fit_predict(self, answers: pd.DataFrame, return_ranks: bool = False) -> Union[pd.Series, pd.DataFrame]: + """ + Args: + answers: A pandas.DataFrame containing `task`, `performer` and `output` columns. + If the `embedding` column exists, embeddings are not obtained by the `encoder`. + golden_embeddings: A pandas Series containing embeddings of golden outputs with + `task` as an index. If is not passed, embeddings are computed by the `encoder`. + return_ranks: if `True` returns ranking score for each of performers answers. + + Returns: + If `return_ranks=False`, pandas.Series indexed by `task` with values — aggregated outputs. + If `return_ranks=True`, pandas.DataFrame with columns `task`, `performer`, `output`, `rank`. + """ + processed_answers = self._preprocess_answers(answers) + return self._fit_impl(processed_answers, return_ranks=return_ranks) + + def _fit_impl(self, answers: pd.DataFrame, use_local_reliability: bool = True, return_ranks: bool = False) -> Union[pd.Series, pd.DataFrame]: + self.use_local_reliability = use_local_reliability + if use_local_reliability: + answers = self._get_local_reliabilities(answers) + self.performers_prior_reliability_ = answers.groupby('performer').count()['task'].apply(lambda x: sps.chi2.isf(self.alpha / 2, x)) + self.performers_reliabilities_ = pd.Series(1.0, index=pd.unique(answers.performer)) + answers = self._calc_score(answers) + + for _ in range(self.n_iter) if self.silent else tqdm(range(self.n_iter)): + self._aggregate_embeddings(answers) + self._update_reliabilities(answers) + answers = self._calc_score(answers) + + if not return_ranks: + return self._choose_nearest_output(answers) + else: + return self._rank_outputs(answers) + + def _rank_outputs(self, answers: pd.DataFrame) -> pd.DataFrame: + """Returns ranking score for each record in `answers` data frame. + """ + answers = self._distance_from_aggregated(answers) + answers['norms_prod'] = answers.apply(lambda row: np.sum(row['embedding'] ** 2) * np.sum(row['task_aggregate'] ** 2), axis=1) + answers['rank'] = answers.performer_reliability * np.exp(-answers.distance / answers.norms_prod) + answers.local_reliability + return answers[['task', 'performer', 'output', 'rank']] + + def _calc_score(self, answers: pd.DataFrame) -> pd.DataFrame: + """Calculates the weight for every embedding according to its local and global reliabilities. + """ + answers = answers.set_index('performer') + answers['performer_reliability'] = self.performers_reliabilities_ + answers = answers.reset_index() + if self.use_local_reliability: + answers['score'] = answers['performer_reliability'] * answers['local_reliability'] + else: + answers['score'] = answers['performer_reliability'] + return answers + + def _update_reliabilities(self, answers: pd.DataFrame) -> None: + """Estimates global reliabilities by aggregated embeddings. + """ + distances = self._distance_from_aggregated(answers) + if self.use_local_reliability: + distances['distance'] = distances['distance'] / distances['local_reliability'] + total_distance = distances.groupby('performer').distance.apply(np.sum) + self.performers_reliabilities_ = self.performers_prior_reliability_ / total_distance + + def _preprocess_answers(self, answers: pd.DataFrame) -> pd.DataFrame: + """Does basic checks for given data and obtaines embeddings if they are not provided. + """ + self._answers_base_checks(answers) + assert not ('golden' in answers and 'golden_embedding' not in answers and self.encoder is None), 'Provide encoder or golden_embeddings' + processed_answers = answers.copy(deep=False) + if 'embedding' not in answers: + assert self.encoder is not None, 'Provide encoder or embedding column' + self._get_embeddings(processed_answers) + if 'golden' in answers: + self._get_golden_embeddings(processed_answers) + return processed_answers + + def _get_local_reliabilities(self, answers: pd.DataFrame) -> pd.DataFrame: + """Computes local (relative) reliabilities for each task's answer. + """ + index = [] + local_reliabilities = [] + processed_pairs = set() + for task, task_answers in answers.groupby('task'): + for performer, reliability in self._local_reliabilities_on_task(task_answers): + if (task, performer) not in processed_pairs: + local_reliabilities.append(reliability) + index.append((task, performer)) + processed_pairs.add((task, performer)) + answers = answers.set_index(['task', 'performer']) + local_reliabilities = pd.Series(local_reliabilities, index=pd.MultiIndex.from_tuples(index, names=['task', 'performer'])) + answers['local_reliability'] = local_reliabilities + return answers.reset_index() + + def _local_reliabilities_on_task(self, task_answers: pd.DataFrame) -> Iterator[Tuple[Any, float]]: + overlap = len(task_answers) + for _, cur_row in task_answers.iterrows(): + performer = cur_row['performer'] + emb_sum = 0.0 + seq_sum = 0.0 + emb = cur_row['embedding'] + seq = cur_row['output'] + emb_norm = np.sum(emb ** 2) + for __, other_row in task_answers.iterrows(): + if other_row['performer'] == performer: + continue + other_emb = other_row['embedding'] + other_seq = other_row['output'] + + # embeddings similarity + diff_norm = np.sum((emb - other_emb) ** 2) + other_norm = np.sum(other_emb ** 2) + emb_sum += np.exp(-diff_norm / (emb_norm * other_norm)) + + # sequence similarity + seq_sum += self._output_similarity(seq, other_seq) + emb_sum /= (overlap - 1) + seq_sum /= (overlap - 1) + + yield performer, self.lambda_emb * emb_sum + self.lambda_out * seq_sum + + +class RASA(HRRASA): + """ + Representation Aware Sequence Aggregation + Jiyi Li and Fumiyo Fukumoto. 2019. + A Dataset of Crowdsourced Word Sequences: Collections and Answer Aggregation for Ground Truth Creation + Proceedings of the First Workshop on Aggregating and Analysing Crowdsourced Annotations for NLP. 24–28. + + https://doi.org/10.18653/v1/D19-5904 + + """ + def __init__(self, n_iter: int = 100, encoder: Any = None, alpha: float = 0.05, silent: bool = True): + """ + Args: + n_iter: A number of iterations. Default: 100. + encoder: A class that encodes a given performer's output into a fixed-size vector (ndarray) + with `encode` method. default: `paraphrase-distilroberta-base-v1` from sentence-transformers. + alpha: confidence level for processing performers' reliabilities. + silent: if not set, shows progress-bar during the training. Default: True. + """ + super(RASA, self).__init__(n_iter, encoder, alpha=alpha, silent=silent) + + def fit_predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """ + Args: + answers: A pandas.DataFrame containing `task`, `performer` and `output` columns. + If the `embedding` column exists, embeddings are not obtained by the `encoder`. + golden_embeddings: A pandas Series containing embeddings of golden outputs with + `task` as an index. If is not passed, embeddings are computed by the `encoder`. + + Returns: + pandas.Series indexed by `task` with values — aggregated outputs. + """ + processed_answers = self._preprocess_answers(answers) + return self._fit_impl(processed_answers, use_local_reliability=False) diff --git a/src/aggregation/m_msr.py b/src/aggregation/m_msr.py new file mode 100644 index 00000000..799b2585 --- /dev/null +++ b/src/aggregation/m_msr.py @@ -0,0 +1,289 @@ +import attr +import numpy as np +import pandas as pd +import scipy.sparse.linalg as sla +import scipy.stats as sps +from typing import Optional, Tuple + +from .base_aggregator import BaseAggregator +from .majority_vote import MajorityVote + + +@attr.attrs(auto_attribs=True) +class MMSR(BaseAggregator): + """ + Matrix Mean-Subsequence-Reduced Algorithm + Qianqian Ma and Alex Olshevsky. 2020. + Adversarial Crowdsourcing Through Robust Rank-One Matrix Completion + 34th Conference on Neural Information Processing Systems (NeurIPS 2020) + https://arxiv.org/abs/2010.12181 + + Input: + - crowd-dataframe [task, performer, label] + - n_iter - optional, the number of iterations to stop after + - eps - optional, threshold in change to stop the algorithm + Output: + - result-dataframe - [task, label] + """ + n_iter: int = 10000 + eps: float = 1e-10 + random_state: Optional[int] = 0 + _observation_matrix: np.ndarray = np.array([]) + _covariation_matrix: np.ndarray = np.array([]) + _n_common_tasks: np.ndarray = np.array([]) + _n_performers: int = 0 + _n_tasks: int = 0 + _n_labels: int = 0 + _labels_mapping: dict = dict() + _performers_mapping: dict = dict() + _tasks_mapping: dict = dict() + + def fit(self, answers: pd.DataFrame) -> 'MMSR': + """Calculates the skill for each performers through rank-one matrix completion + The calculated skills are stored in an instance of the class and can be obtained by the field 'performers_skills' + After 'fit' you can get 'performer_skills' from class field. + + Args: + answers(pandas.DataFrame): Frame contains performers answers. One row per answer. + Should contain columns 'performer', 'task', 'label'. + Returns: + MMSR: self for call next methods + + Raises: + TypeError: If the input datasets are not of type pandas.DataFrame. + AssertionError: If there is some collumn missing in 'dataframes'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + return self + + def predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """Predict correct labels for tasks. Using calculated performers skill, stored in self instance. + After 'predict' you can get probabilities for all labels from class field 'probas'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Predicted label for each task. + - task - unique values from input dataset + - label - most likely label + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers' in 'fit'. + """ + self._answers_base_checks(answers) + return self._predict_impl(answers) + + def predict_proba(self, answers: pd.DataFrame) -> pd.DataFrame: + """Calculates probabilities for each label of task. + After 'predict_proba' you can get predicted labels from class field 'tasks_labels'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task and the likelihood of correctness. + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers' in 'fit'. + """ + self._answers_base_checks(answers) + self._predict_impl(answers) + return self.probas + + def fit_predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """Performes 'fit' and 'predict' in one call. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Predicted label for each task. + - task - unique values from input dataset + - label - most likely label + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + return self._predict_impl(answers) + + def fit_predict_proba(self, answers: pd.DataFrame) -> pd.DataFrame: + """Performes 'fit' and 'predict_proba' in one call. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task and the likelihood of correctness. + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + self._predict_impl(answers) + return self.probas + + def _fit_impl(self, answers: pd.DataFrame) -> None: + self._construnct_covariation_matrix(answers) + self._m_msr() + + def _predict_impl(self, answers: pd.DataFrame) -> pd.DataFrame: + weighted_mv = MajorityVote() + labels = weighted_mv.fit_predict(answers, self._performers_weights) + self.tasks_labels = labels + self.probas = weighted_mv.probas + return self.tasks_labels + + def _m_msr(self) -> None: + F_param = int(np.floor(self._sparsity / 2)) - 1 + n, m = self._covariation_matrix.shape + u = sps.uniform.rvs(size=(n, 1), random_state=self.random_state) + v = sps.uniform.rvs(size=(m, 1), random_state=self.random_state) + observed_entries = np.abs(np.sign(self._n_common_tasks)) == 1 + X = np.abs(self._covariation_matrix) + + for _ in range(self.n_iter): + v_prev = v + u_prev = u + for j in range(n): + target_v = X[:, j] + target_v = target_v[observed_entries[:, j]] / u[observed_entries[:, j]] + + y = self._remove_largest_and_smallest_F_value(target_v, F_param, v[j][0], self._n_tasks) + if len(y) == 0: + v[j] = v[j] + else: + v[j][0] = y.mean() + + for i in range(m): + target_u = X[i, :].reshape(-1, 1) + target_u = target_u[observed_entries[i, :].ravel()] / v[observed_entries[i, :].ravel()] + y = self._remove_largest_and_smallest_F_value(target_u, F_param, u[i][0], self._n_tasks) + if len(y) == 0: + u[i] = u[i] + else: + u[i][0] = y.mean() + + if np.linalg.norm(u @ v.T - u_prev @ v_prev.T, ord='fro') < self.eps: + break + + k = np.sqrt(np.linalg.norm(u) / np.linalg.norm(v)) + x_track_1 = u / k + x_track_2 = self._sign_determination_valid(self._covariation_matrix, x_track_1) + x_track_3 = np.minimum(x_track_2, 1 - 1. / np.sqrt(self._n_tasks)) + x_MSR = np.maximum(x_track_3, -1 / (self._n_labels - 1) + 1. / np.sqrt(self._n_tasks)) + + performers_probas = x_MSR * (self._n_labels - 1) / (self._n_labels) + 1 / self._n_labels + performers_probas = performers_probas.ravel() + self._set_skills_from_array(performers_probas) + self._set_performers_weights() + + def _set_skills_from_array(self, array: np.ndarray) -> None: + inverse_performers_mapping = {ind: performer for performer, ind in self._performers_mapping.items()} + self.performers_skills = pd.DataFrame( + [ + [inverse_performers_mapping[ind], array[ind]] + for ind in range(len(array)) + ], + columns=['performer', 'skill'] + ) + + def _set_performers_weights(self) -> None: + self._performers_weights = self.performers_skills.copy().rename(columns={'skill': 'weight'}) + self._performers_weights['weight'] = self._performers_weights['weight'] * (self._n_labels - 1) / (self._n_labels) + 1 / self._n_labels + + @staticmethod + def _sign_determination_valid(C: np.ndarray, s_abs: np.ndarray) -> np.ndarray: + S = np.sign(C) + n = len(s_abs) + + valid_idx = np.where(np.sum(C, axis=1) != 0)[0] + S_valid = S[valid_idx[:, None], valid_idx] + k = S_valid.shape[0] + upper_idx = np.triu(np.ones(shape=(k, k))) + S_valid_upper = S_valid * upper_idx + new_node_end_I, new_node_end_J = np.where(S_valid_upper == 1) + S_valid[S_valid == 1] = 0 + I = np.eye(k) + S_valid_new = I[new_node_end_I, :] + I[new_node_end_J, :] + m = S_valid_new.shape[0] + A = np.vstack((np.hstack((np.abs(S_valid), S_valid_new.T)), np.hstack((S_valid_new, np.zeros(shape=(m, m)))))) + n_new = A.shape[0] + W = (1. / np.sum(A, axis=1)).reshape(-1, 1) @ np.ones(shape=(1, n_new)) * A + D, V = sla.eigs(W + np.eye(n_new), 1, which='SM') + V = V.real + sign_vector = np.sign(V) + s_sign = np.zeros(shape=(n, 1)) + s_sign[valid_idx] = np.sign(np.sum(sign_vector[:k])) * s_abs[valid_idx] * sign_vector[:k] + return s_sign + + @staticmethod + def _remove_largest_and_smallest_F_value(x, F, a, n_tasks) -> np.ndarray: + y = np.sort(x, axis=0) + if np.sum(y < a) < F: + y = y[y[:, 0] >= a] + else: + y = y[F:] + + m = y.shape[0] + if np.sum(y > a) < F: + y = y[y[:, 0] <= a] + else: + y = np.concatenate((y[:m - F], y[m:]), axis=0) + if len(y) == 1 and y[0][0] == 0: + y[0][0] = 1 / np.sqrt(n_tasks) + return y + + def _construnct_covariation_matrix(self, answers: pd.DataFrame) -> Tuple[np.ndarray]: + labels = pd.unique(answers.label) + self._n_labels = len(labels) + self._labels_mapping = {labels[idx]: idx + 1 for idx in range(self._n_labels)} + + performers = pd.unique(answers.performer) + self._n_performers = len(performers) + self._performers_mapping = {performers[idx]: idx for idx in range(self._n_performers)} + + tasks = pd.unique(answers.task) + self._n_tasks = len(tasks) + self._tasks_mapping = {tasks[idx]: idx for idx in range(self._n_tasks)} + + self._observation_matrix = np.zeros(shape=(self._n_performers, self._n_tasks)) + for i, row in answers.iterrows(): + self._observation_matrix[self._performers_mapping[row['performer']]][self._tasks_mapping[row['task']]] = self._labels_mapping[row['label']] + + self._n_common_tasks = np.sign(self._observation_matrix) @ np.sign(self._observation_matrix).T + self._n_common_tasks -= np.diag(np.diag(self._n_common_tasks)) + self._sparsity = np.min(np.sign(self._n_common_tasks).sum(axis=0)) + + # Can we rewrite it in matrix operations? + self._covariation_matrix = np.zeros(shape=(self._n_performers, self._n_performers)) + for i in range(self._n_performers): + for j in range(self._n_performers): + if self._n_common_tasks[i][j]: + valid_idx = np.sign(self._observation_matrix[i]) * np.sign(self._observation_matrix[j]) + self._covariation_matrix[i][j] = np.sum((self._observation_matrix[i] == self._observation_matrix[j]) * valid_idx) / self._n_common_tasks[i][j] + + self._covariation_matrix *= self._n_labels / (self._n_labels - 1) + self._covariation_matrix -= np.ones(shape=(self._n_performers, self._n_performers)) / (self._n_labels - 1) diff --git a/src/aggregation/majority_vote.py b/src/aggregation/majority_vote.py new file mode 100644 index 00000000..89b5bfb9 --- /dev/null +++ b/src/aggregation/majority_vote.py @@ -0,0 +1,80 @@ +import attr +import pandas as pd +from typing import Optional + +from .base_aggregator import BaseAggregator + + +@attr.attrs(auto_attribs=True) +class MajorityVote(BaseAggregator): + """ + Majority Vote - chooses the correct label for which more performers voted + + After predicting stored different data frames (details in BaseAggregator): + tasks_labels: Predicted labels for each task + probas: Probabilities for each label for task + performers_skills: Predicted labels for each task + """ + + def fit_predict(self, answers: pd.DataFrame, performers_weights: Optional[pd.DataFrame] = None) -> pd.DataFrame: + """Chooses the correct label for which more performers voted + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task and the likelihood of correctness. + - task - unique values from input dataset + - label - most likely label + performers_weights(pandas.DataFrame): Optional, frame with performers weights. Should contain columns + 'performer', 'weight'. + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers' + """ + self._predict_impl(answers, performers_weights) + return self.tasks_labels + + def fit_predict_proba(self, answers: pd.DataFrame, performers_weights: Optional[pd.DataFrame] = None) -> pd.DataFrame: + """Calculates Probabilities for each label of task. + If it was no such label for some task, this task doesn't has probs for this label. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + performers_weights(pandas.DataFrame): Optional, frame with performers weights. Should contain columns + 'performer', 'weight'. + + Returns: + pandas.DataFrame: Probabilities for each label of task + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers' + """ + self._predict_impl(answers, performers_weights) + return self.probas + + def _predict_impl(self, answers: pd.DataFrame, performers_weights: Optional[pd.DataFrame] = None) -> None: + self._answers_base_checks(answers) + if performers_weights is not None: + if not isinstance(performers_weights, pd.DataFrame): + raise TypeError('"performers_weights" parameter must be of type pandas DataFrame') + assert 'performer' in performers_weights, 'There is no "performer" column in "performers_weights"' + assert 'weight' in performers_weights, 'There is no "weight" column in "performers_weights"' + + if performers_weights is None: + answ_scores = answers.groupby(['task', 'label'], as_index=False)['performer'].count() + answ_scores = answ_scores.rename(columns={'performer': 'score'}) + else: + answ_scores = answers.join(performers_weights.set_index('performer'), on='performer', rsuffix='_r').groupby(['task', 'label'], as_index=False).sum('weight') + answ_scores = answ_scores.rename(columns={'weight': 'score'}) + + probas = self._calculate_probabilities(answ_scores) + labels = self._choose_labels(probas) + self._calc_performers_skills(answers, labels) diff --git a/src/aggregation/py.typed b/src/aggregation/py.typed new file mode 100644 index 00000000..b2589645 --- /dev/null +++ b/src/aggregation/py.typed @@ -0,0 +1,2 @@ +partial + diff --git a/src/aggregation/utils.py b/src/aggregation/utils.py new file mode 100644 index 00000000..9a4948dd --- /dev/null +++ b/src/aggregation/utils.py @@ -0,0 +1,28 @@ +from typing import Union, Callable + +import pandas as pd +import numpy as np + + +def _argmax_random_ties(array: np.ndarray) -> int: + # Returns the index of the maximum element + # If there are several such elements, it returns a random one + return int(np.random.choice(np.flatnonzero(array == array.max()))) + + +def evaluate_in(row: pd.Series) -> int: + return int(row['label_pred'] in row['label_true']) + + +def evaluate_equal(row: pd.Series) -> int: + return int(row['label_pred'] == row['label_true']) + + +def evaluate(df_true: pd.DataFrame, df_pred: pd.DataFrame, + evaluate_func: Callable[[pd.Series], int] = evaluate_in) -> Union[str, float]: + df = df_true.merge(df_pred, on='task', suffixes=('_true', '_pred')) + + assert len(df_true) == len(df), f'Dataset length mismatch, expected {len(df_true):d}, got {len(df):d}' + + df['evaluation'] = df.apply(evaluate_func, axis=1) + return float(df['evaluation'].mean()) diff --git a/src/aggregation/wawa.py b/src/aggregation/wawa.py new file mode 100644 index 00000000..0d5c24b5 --- /dev/null +++ b/src/aggregation/wawa.py @@ -0,0 +1,87 @@ +import attr +import pandas as pd + +from .majority_vote import MajorityVote +from.base_aggregator import BaseAggregator + + +@attr.attrs(auto_attribs=True) +class Wawa(BaseAggregator): + """ + Worker Agreement with Aggregate + + Calculates the considers the likelihood of coincidence of the performers opinion with the majority + Based on this, for each task, calculates the sum of the agreement of each label + The correct label is the one where the sum of the agreements is greater + + After predicting stored different data frames (details in BaseAggregator): + tasks_labels: Predicted labels for each task + probas: Probabilities for each label for task + performers_skills: Predicted labels for each task + + """ + + def fit_predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """Predict correct labels for tasks. + After 'fit_predict' you can get probabilities for all labels from class field 'probas', and + workers skills from 'workers_skills'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task. + - task - unique values from input dataset + - label - most likely label + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers' + """ + self._predict_impl(answers) + return self.tasks_labels + + def fit_predict_proba(self, answers: pd.DataFrame) -> pd.DataFrame: + """Calculates probabilities for each label of task. + If it was no such label for some task, this task doesn't has probs for this label. + After 'fit_predict_proba' you can get predicted labels from class field 'tasks_labels', and + workers skills from 'workers_skills'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: probabilities for each label. + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers' + """ + self._predict_impl(answers) + return self.probas + + def _predict_impl(self, answers: pd.DataFrame) -> pd.DataFrame: + self._answers_base_checks(answers) + + # calculating performers skills + mv_aggregation = MajorityVote() + mv_aggregation.fit_predict(answers) + self.performers_skills = mv_aggregation.performers_skills + + # join labels and skills + labels_probas = answers.merge(self.performers_skills, on='performer') + labels_probas = ( + labels_probas + .groupby(['task', 'label']) + .agg({'skill': sum}) + .reset_index() + .rename(columns={'skill': 'score'}) + ) + + labels_probas = self._calculate_probabilities(labels_probas) + self._choose_labels(labels_probas) diff --git a/src/aggregation/zero_based_skill.py b/src/aggregation/zero_based_skill.py new file mode 100644 index 00000000..d3a8364a --- /dev/null +++ b/src/aggregation/zero_based_skill.py @@ -0,0 +1,229 @@ +import attr +import numpy as np +import pandas as pd +from typing import Any, Optional, Set + +from .base_aggregator import BaseAggregator +from .majority_vote import MajorityVote + + +@attr.attrs(auto_attribs=True) +class LRScheduler: + + steps_to_reduce: int = 20 + reduce_rate: float = 2.0 + _lr: Optional[float] = None + _steps: int = 1 + + def step(self) -> float: + if self._lr is None: + raise AssertionError('step called before setting learning rate') + if self._steps % self.steps_to_reduce == 0: + self._lr /= self.reduce_rate + return self._lr + + def reset(self) -> None: + self._steps = 1 + + +@attr.attrs(auto_attribs=True) +class ZeroBasedSkill(BaseAggregator): + """The Zero-Based Skill aggregation model + + Performs weighted majority voting on tasks. After processing a pool of tasks, + reestimates performers' skills according to the correctness of their answers. + Repeats this process until labels do not change or the number of iterations exceeds. + + It's necessary that all performers in a dataset that send to 'predict' existed in answers + the dataset that was sent to 'fit'. + + After fit stored 'performers_skills' - estimated skills of performers. + + After predicting stored different data frames (details in BaseAggregator): + tasks_labels: Predicted labels for each task + probas: Probabilities for each label for the task + """ + + lr: float = 1e-1 + n_iter: int = 100 + performers_skills: Optional[pd.DataFrame] = None + labels_set: Optional[Set[Any]] = None + num_labels: Optional[int] = None + early_stopping: int = 3 + eps: float = 1e-5 + lr_scheduler = LRScheduler() + + def fit(self, answers: pd.DataFrame) -> 'ZeroBasedSkill': + """Calculates the skill for each performers, based on answers on provided dataset + The calculated skills are stored in an instance of the class and can be obtained by the field 'performers_skills' + After 'fit' you can get 'performer_skills' from class field. + + Args: + answers(pandas.DataFrame): Frame contains performers answers. One row per answer. + Should contain columns 'performer', 'task', 'label'. + Returns: + ZeroBasedSkill: self for call next methods + + Raises: + TypeError: If the input datasets are not of type pandas.DataFrame. + AssertionError: If there is some collumn missing in 'dataframes'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + return self + + def predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """Predict correct labels for tasks. Using calculated performers skill, stored in self instance. + After 'predict' you can get probabilities for all labels from class field 'probas'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Predicted label for each task. + - task - unique values from input dataset + - label - most likely label + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers' in 'fit'. + """ + self._answers_base_checks(answers) + self._predict_impl(answers) + return self.tasks_labels + + def predict_proba(self, answers) -> pd.DataFrame: + """Calculates Probabilities for each label of task. + After 'predict_proba' you can get predicted labels from class field 'tasks_labels'. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task and the likelihood of correctness. + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + Or when 'predict' called without 'fit'. + Or if there are new performers in 'answer' that were not in 'answers' in 'fit'. + """ + self._answers_base_checks(answers) + self._predict_impl(answers) + return self.probas + + def fit_predict(self, answers: pd.DataFrame) -> pd.DataFrame: + """Performes 'fit' and 'predict' in one call. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Predicted label for each task. + - task - unique values from input dataset + - label - most likely label + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + self._predict_impl(answers) + return self.tasks_labels + + def fit_predict_proba(self, answers: pd.DataFrame) -> pd.DataFrame: + """Performes 'fit' and 'predict_proba' in one call. + + Args: + answers(pandas.DataFrame): Frame with performers answers on task. One row per answer. + Should contain columns 'performer', 'task', 'label' + + Returns: + pandas.DataFrame: Scores for each task and the likelihood of correctness. + - task - as dataframe index + - label - as dataframe columns + - proba - dataframe values + + Raises: + TypeError: If answers don't has pandas.DataFrame type + AssertionError: If there is some collumn missing in 'answers'. + """ + self._answers_base_checks(answers) + self._fit_impl(answers) + self._predict_impl(answers) + return self.probas + + def _predict_impl(self, answers: pd.DataFrame) -> pd.DataFrame: + self._init_by_input(answers) + + weighted_mv = MajorityVote() + labels = weighted_mv.fit_predict(answers, self.performers_skills.rename(columns={'skill': 'weight'})) + self.tasks_labels = labels + self.probas = weighted_mv.probas + return self.tasks_labels + + def _init_by_input(self, answers: pd.DataFrame) -> None: + if not self.labels_set: + self.labels_set = set(answers['label']) + self.num_labels = len(self.labels_set) + + if self.performers_skills is None: + self.performers_skills = pd.DataFrame( + { + 'performer': answers.performer.unique(), + 'skill': 1 / self.num_labels + self.eps, + } + ) + else: + new_performers_index = pd.Index(answers.performer, name='performer').difference(self.performers_skills.performer) + new_performers_skills = pd.DataFrame({'skill': 1 / self.num_labels + self.eps}, index=new_performers_index) + self.performers_skills = pd.concat([self.performers_skills.set_index('performer'), new_performers_skills], copy=False).reset_index() + + self.tasks_labels = pd.DataFrame({ + 'task': answers.task.unique(), + 'label': np.NaN, + }) + + def _fit_impl(self, answers: pd.DataFrame) -> None: + self._init_by_input(answers) + + self.lr_scheduler._lr = self.lr + no_change = 0 + for _ in range(self.n_iter): + labels_changed = self._train_iter(answers) + self.lr = self.lr_scheduler.step() + if not labels_changed: + no_change += 1 + else: + no_change = 0 + if no_change == self.early_stopping: + break + self._calc_performers_skills(answers, self.tasks_labels) + self.lr_scheduler.reset() + + def _train_iter(self, answers: pd.DataFrame) -> bool: + prev_labels = self.tasks_labels.copy() + weighted_mv = MajorityVote() + labels = weighted_mv.fit_predict(answers, self.performers_skills.rename(columns={'skill': 'weight'})) + self.tasks_labels = labels + self.probas = weighted_mv.probas + + labels_changed = not prev_labels.set_index('task')['label'].equals(labels.set_index('task')['label']) + + prev_skills = self.performers_skills.copy().set_index('performer') + self._calc_performers_skills(answers, labels) + self.performers_skills = self.performers_skills.set_index('performer') + + self.performers_skills = prev_skills + self.lr * (self.performers_skills - prev_skills) + self.performers_skills = self.performers_skills.reset_index() + + return labels_changed diff --git a/src/metrics/__init__.py b/src/metrics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/metrics/data/__init__.py b/src/metrics/data/__init__.py new file mode 100644 index 00000000..6ae6787a --- /dev/null +++ b/src/metrics/data/__init__.py @@ -0,0 +1,3 @@ +from ._classification import consistency + +__all__ = ['consistency'] diff --git a/src/metrics/data/_classification.py b/src/metrics/data/_classification.py new file mode 100644 index 00000000..01effbc3 --- /dev/null +++ b/src/metrics/data/_classification.py @@ -0,0 +1,66 @@ +from typing import Any, Optional, Union +import pandas as pd + +from crowdkit.aggregation.base_aggregator import BaseAggregator +from crowdkit.aggregation import MajorityVote + + +def _check_answers(answers: pd.DataFrame) -> None: + if not isinstance(answers, pd.DataFrame): + raise TypeError('Working only with pandas DataFrame') + assert 'task' in answers, 'There is no "task" column in answers' + assert 'performer' in answers, 'There is no "performer" column in answers' + assert 'label' in answers, 'There is no "label" column in answers' + + +def _label_probability(row: pd.Series, label: Any, n_labels: int) -> float: + """Numerator in the Bayes formula""" + return row['skill'] if row['label'] == label else (1.0 - row['skill']) / (n_labels - 1) + + +def _task_consistency(row: pd.Series) -> float: + """Posterior probability for a single task""" + return row[row['aggregated_label']] / row['denominator'] if row['denominator'] != 0 else 0.0 + + +def consistency(answers: pd.DataFrame, + performers_skills: Optional[pd.Series] = None, + aggregator: BaseAggregator = MajorityVote(), + by_task: bool = False) -> Union[float, pd.Series]: + """ + Consistency metric: posterior probability of aggregated label given performers skills + calculated using standard Dawid-Skene model. + Args: + answers (pandas.DataFrame): A data frame containing `task`, `performer` and `label` columns. + performers_skills (Optional[pandas.Series]): performers skills e.g. golden set skills. If not provided, + uses aggregator's `performers_skills` attribute. + aggregator (aggregation.BaseAggregator): aggregation method, default: MajorityVote + by_task (bool): if set, returns consistencies for every task in provided data frame. + + Returns: + Union[float, pd.Series] + """ + _check_answers(answers) + aggregated = aggregator.fit_predict(answers) + if performers_skills is None and hasattr(aggregator, 'performers_skills'): + performers_skills = aggregator.performers_skills.set_index('performer')['skill'] + else: + raise AssertionError('This aggregator is not supported. Please, provide performers skills.') + + answers = answers.copy(deep=False) + answers.set_index('task', inplace=True) + answers = answers.reset_index().set_index('performer') + answers['skill'] = performers_skills + answers.reset_index(inplace=True) + labels = pd.unique(answers.label) + for label in labels: + answers[label] = answers.apply(lambda row: _label_probability(row, label, len(labels)), axis=1) + labels_proba = answers.groupby('task').prod() + labels_proba['aggregated_label'] = aggregated.set_index('task')['label'] + labels_proba['denominator'] = labels_proba[list(labels)].sum(axis=1) + consistecies = labels_proba.apply(_task_consistency, axis=1) + + if by_task: + return consistecies + else: + return consistecies.mean() diff --git a/src/metrics/performers/__init__.py b/src/metrics/performers/__init__.py new file mode 100644 index 00000000..8b9acd5b --- /dev/null +++ b/src/metrics/performers/__init__.py @@ -0,0 +1,4 @@ +from .golden_set_accuracy import golden_set_accuracy +from .accuracy_on_aggregates import accuracy_on_aggregates + +__all__ = ['golden_set_accuracy', 'accuracy_on_aggregates'] diff --git a/src/metrics/performers/accuracy_on_aggregates.py b/src/metrics/performers/accuracy_on_aggregates.py new file mode 100644 index 00000000..8d810482 --- /dev/null +++ b/src/metrics/performers/accuracy_on_aggregates.py @@ -0,0 +1,34 @@ +from typing import Any, Callable, Optional, Union +import pandas as pd + +from crowdkit.aggregation.base_aggregator import BaseAggregator +from crowdkit.aggregation import MajorityVote +from .golden_set_accuracy import golden_set_accuracy + + +def accuracy_on_aggregates(answers: pd.DataFrame, + aggregator: Optional[BaseAggregator] = MajorityVote(), + aggregates: Optional[pd.Series] = None, + by_performer: bool = False, + answer_column: Any = 'label', + comapre_function: Optional[Callable[[Any, Any], float]] = None) -> Union[float, pd.Series]: + """ + Accuracy on aggregates: a fraction of worker's answers that match the aggregated one. + Args: + answers (pandas.DataFrame): a data frame containing `task`, `performer` and `label` columns. + aggregator (aggregation.BaseAggregator): aggregation algorithm. default: MajorityVote + aggregates (Optional[pandas.Series]): aggregated answers for provided tasks. + by_performer (bool): if set, returns accuracies for every performer in provided data frame. Otherwise, + returns an average accuracy of all performers. + answer_column: column in the data frame that contanes performers answers. + comapre_function (Optional[Callable[[Any, Any], float]]): function that compares performer's answer with + the golden answer. If `None`, uses `==` operator. + + Returns: + Union[float, pd.Series] + """ + if aggregates is None and aggregator is None: + raise AssertionError('One of aggregator or aggregates should be not None') + if aggregates is None: + aggregates = aggregator.fit_predict(answers).set_index('task')[answer_column] + return golden_set_accuracy(answers, aggregates, by_performer, answer_column, comapre_function) diff --git a/src/metrics/performers/golden_set_accuracy.py b/src/metrics/performers/golden_set_accuracy.py new file mode 100644 index 00000000..26d621ae --- /dev/null +++ b/src/metrics/performers/golden_set_accuracy.py @@ -0,0 +1,36 @@ +from typing import Any, Callable, Optional, Union +import pandas as pd + + +def golden_set_accuracy(answers: pd.DataFrame, + golden_answers: pd.Series, + by_performer: bool = False, + answer_column: Any = 'label', + comapre_function: Optional[Callable[[Any, Any], float]] = None) -> Union[float, pd.Series]: + """ + Golden set accuracy metric: a fraction of worker's correct answers on golden tasks. + Args: + answers (pandas.DataFrame): A data frame containing `task`, `performer` and `label` columns. + golden_answers (pandas.Series): ground-truth answers for golden tasks. + by_performer (bool): if set, returns accuracies for every performer in provided data frame. Otherwise, + returns an average accuracy of all performers. + answer_column: column in the data frame that contanes performers answers. + comapre_function (Optional[Callable[[Any, Any], float]]): function that compares performer's answer with + the golden answer. If `None`, uses `==` operator. + Returns: + Union[float, pd.Series] + """ + answers = answers.copy(deep=False) + answers.set_index('task', inplace=True) + answers['golden'] = golden_answers + answers = answers[answers.golden.notna()] + if comapre_function is None: + answers['skill'] = answers[answer_column] == answers['golden'] + else: + answers['skill'] = answers.apply(lambda row: comapre_function(row[answer_column], row['golden']), axis=1) + + if by_performer: + performers_skills = answers.groupby('performer').sum('skill')['skill'] + return performers_skills / answers.groupby('performer').count()['label'] + else: + return answers['skill'].mean() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/aggregation/__init__.py b/tests/aggregation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/aggregation/data_gold_mv.py b/tests/aggregation/data_gold_mv.py new file mode 100644 index 00000000..eda064ad --- /dev/null +++ b/tests/aggregation/data_gold_mv.py @@ -0,0 +1,130 @@ +import pytest +import pandas as pd +import numpy as np + + +# Gold Majority vote on toy YSDA + +@pytest.fixture +def toy_labels_result_gold(toy_ground_truth_df): + return toy_ground_truth_df + + +@pytest.fixture +def toy_skills_result_gold(): + return pd.DataFrame( + [ + ['w1', 0.5], + ['w2', 1.0], + ['w3', 1.0], + ['w4', 0.5], + ['w5', 0.0], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def toy_probas_result_gold(): + result_df = pd.DataFrame( + [ + [0.750000, 0.250000], + [0.833333, 0.166667], + [0.333333, 0.666667], + [1.0, np.NaN], + [0.166667, 0.833333], + ], + columns=['yes', 'no'], + index=['t1', 't2', 't3', 't4', 't5'], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df + + +@pytest.fixture +def toy_answers_on_gold_df_cannot_fit(): + # When we have this dataset and 'toy_gold_df' we are trying to calculate the skills of the performers, + # and we cannot do it for some performers + return pd.DataFrame( + [ + ['w1', 't1', 'no'], + ['w2', 't2', 'yes'], + ['w3', 't1', 'yes'], + ['w4', 't2', 'yes'], + ['w5', 't5', 'yes'], # 'w5' answer, but 't5' not in 'toy_gold_df' + ], + columns=['performer', 'task', 'label'] + ) + + +@pytest.fixture +def toy_answers_on_gold_df_cannot_predict(): + # When we have this dataset in 'fit', and standart 'toy_answers_df' in predict and we cannot predict + # labels or probas, because this dataset doesn't contain all performers from 'toy_answers_df' + return pd.DataFrame( + [ + ['w1', 't1', 'no'], + ['w2', 't2', 'yes'], + ['w3', 't1', 'yes'], + ['w4', 't2', 'yes'], + # ['w5', 't5', 'yes'], # 'w5' missing here, but exists 'toy_answers_df' + ], + columns=['performer', 'task', 'label'] + ) + + +# Gold Majority vote on simple + +@pytest.fixture +def simple_labels_result_gold(simple_ground_truth_df): + return simple_ground_truth_df + + +@pytest.fixture +def simple_skills_result_gold(): + return pd.DataFrame( + [ + ['0c3eb7d5fcc414db137c4180a654c06e', 0.5], + ['0f65edea0a6dc7b9acba1dea313bbb3d', 1.0], + ['a452e450f913cfa987cad58d50393718', 1.0], + ['b17c3301ad2ccbb798716fdd405d16e8', 1.0], + ['bde3b214b06c1efa6cb1bc6284dc72d2', 1.0], + ['e563e2fb32fce9f00123a65a1bc78c55', 0.5], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def simple_probas_result_gold(): + result_df = pd.DataFrame( + [ + [0.800000, np.NaN, 0.200000], + [0.857143, 0.142857, np.NaN], + [np.NaN, 0.857143, 0.142857], + [np.NaN, np.NaN, 1.000000], + [0.500000, 0.250000, 0.250000], + [np.NaN, np.NaN, 1.000000], + [np.NaN, 1.000000, np.NaN], + [np.NaN, 0.800000, 0.200000], + [np.NaN, np.NaN, 1.000000], + [np.NaN, np.NaN, 1.000000], + ], + columns=['chicken', 'goose', 'parrot'], + index=[ + '1231239876--5fac0d234ffb2f3b00893ee4', + '1231239876--5fac0d234ffb2f3b00893ee8', + '1231239876--5fac0d234ffb2f3b00893eec', + '1231239876--5fac0d234ffb2f3b00893efb', + '1231239876--5fac0d234ffb2f3b00893efd', + '1231239876--5fac0d234ffb2f3b00893f02', + '1231239876--5fac0d234ffb2f3b00893f03', + '1231239876--5fac0d234ffb2f3b00893f05', + '1231239876--5fac0d234ffb2f3b00893f07', + '1231239876--5fac0d234ffb2f3b00893f08', + ], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df diff --git a/tests/aggregation/data_hrrasa.py b/tests/aggregation/data_hrrasa.py new file mode 100644 index 00000000..9c6388a7 --- /dev/null +++ b/tests/aggregation/data_hrrasa.py @@ -0,0 +1,30 @@ +import pytest +import pandas as pd + + +@pytest.fixture +def simple_text_result_hrrasa(): + return pd.Series( + [ + 'i shouuld had seen you damp my word', + 'it must indeed be allowed that is structure and sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thought in a philosophical language meaning this the works of socrates who it was said reduced philosophy to the simplicity of life ', # noqa + 'nature discovers are confusing to us pain is holding the same emotions and grimaces of the face that serve for weeping serve for laughter too and indeed before the one and the other be finished do but observe the painter\'s manner of handling and you will be in doubt to which of the two the design tems and the extreme laghter does it last to bring tears', # noqa + 'they is more than one amongs us who would like to immitate them i think', + 'its all nonsense ned cried chris for them to think that they\'re staying on account of us hello chris are you listening' + ], + index=['1255-74899-0020', '1651-136854-0030', '7601-101619-0003', '7601-175351-0021', '8254-84205-0005'] + ) + + +@pytest.fixture +def simple_text_result_rasa(): + return pd.Series( + [ + 'i shouuld had seen you damp my word', + 'it must indeed be allowed that is structure and sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thought in a philosophical language meaning this the works of socrates who it was said reduced philosophy to the simplicity of life ', # noqa + 'nature discovers are confusing to us pain is holding the same emotions and grimaces of the face that serve for weeping serve for laughter too and indeed before the one and the other be finished do but observe the painter\'s manner of handling and you will be in doubt to which of the two the design tems and the extreme laghter does it last to bring tears', # noqa + 'they is more than one amongs us who would like to immitate them i think', + 'its all nonsense ned cried chris for them to think that they\'re staying on account of us hello chris are you listening' + ], + index=['1255-74899-0020', '1651-136854-0030', '7601-101619-0003', '7601-175351-0021', '8254-84205-0005'] + ) diff --git a/tests/aggregation/data_mmsr.py b/tests/aggregation/data_mmsr.py new file mode 100644 index 00000000..2c33a307 --- /dev/null +++ b/tests/aggregation/data_mmsr.py @@ -0,0 +1,105 @@ +import pytest +import pandas as pd +import numpy as np + + +# Wawa on toy YSDA + +@pytest.fixture +def toy_labels_result_mmsr(): + return pd.DataFrame( + [ + ['t1', 'yes'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def toy_skills_result_mmsr(): + return pd.DataFrame( + [ + ['w1', 0.328452], + ['w2', 0.776393], + ['w3', 0.759235], + ['w4', 0.671548], + ['w5', 0.776393], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def toy_probas_result_mmsr(): + result_df = pd.DataFrame( + [ + [0.473821, 0.526179], + [0.414814, 0.585186], + [0.639077, 0.360923], + [np.NaN, 1.000000], + [0.840177, 0.159823], + ], + columns=['no', 'yes'], + index=['t1', 't2', 't3', 't4', 't5'], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df + + +@pytest.fixture +def simple_labels_result_mmsr(simple_ground_truth_df): + return simple_ground_truth_df + + +@pytest.fixture +def simple_skills_result_mmsr(): + return pd.DataFrame( + [ + ['0c3eb7d5fcc414db137c4180a654c06e', 0.210819], + ['0f65edea0a6dc7b9acba1dea313bbb3d', 0.789181], + ['a452e450f913cfa987cad58d50393718', 0.789181], + ['b17c3301ad2ccbb798716fdd405d16e8', 0.789181], + ['bde3b214b06c1efa6cb1bc6284dc72d2', 0.789181], + ['e563e2fb32fce9f00123a65a1bc78c55', 0.779799], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def simple_probas_result_mmsr(): + result_df = pd.DataFrame( + [ + [0.783892, np.NaN, 0.216108], + [0.751367, 0.248633, np.NaN], + [np.NaN, 0.844744, 0.155256], + [np.NaN, np.NaN, 1.000000], + [0.393067, 0.390206, 0.216726], + [np.NaN, np.NaN, 1.000000], + [np.NaN, 1.000000, np.NaN], + [np.NaN, 0.783892, 0.216108], + [np.NaN, np.NaN, 1.000000], + [np.NaN, np.NaN, 1.000000], + ], + columns=['chicken', 'goose', 'parrot'], + index=[ + '1231239876--5fac0d234ffb2f3b00893ee4', + '1231239876--5fac0d234ffb2f3b00893ee8', + '1231239876--5fac0d234ffb2f3b00893eec', + '1231239876--5fac0d234ffb2f3b00893efb', + '1231239876--5fac0d234ffb2f3b00893efd', + '1231239876--5fac0d234ffb2f3b00893f02', + '1231239876--5fac0d234ffb2f3b00893f03', + '1231239876--5fac0d234ffb2f3b00893f05', + '1231239876--5fac0d234ffb2f3b00893f07', + '1231239876--5fac0d234ffb2f3b00893f08', + ], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df diff --git a/tests/aggregation/data_mv.py b/tests/aggregation/data_mv.py new file mode 100644 index 00000000..9c2a814f --- /dev/null +++ b/tests/aggregation/data_mv.py @@ -0,0 +1,107 @@ +import pytest +import pandas as pd +import numpy as np + + +# Majority vote on toy YSDA + +@pytest.fixture +def toy_labels_result_mv(): + return pd.DataFrame( + [ + ['t1', 'no'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def toy_skills_result_mv(): + return pd.DataFrame( + [ + ['w1', 0.6], + ['w2', 0.8], + ['w3', 1.0], + ['w4', 0.4], + ['w5', 0.8], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def toy_probas_result_mv(): + result_df = pd.DataFrame( + [ + [0.5, 0.5], + [0.6, 0.4], + [0.4, 0.6], + [1.0, np.NaN], + [0.2, 0.8], + ], + columns=['yes', 'no'], + index=['t1', 't2', 't3', 't4', 't5'], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df + + +# Majority vote on simple + +@pytest.fixture +def simple_labels_result_mv(simple_ground_truth_df): + return simple_ground_truth_df + + +@pytest.fixture +def simple_skills_result_mv(): + return pd.DataFrame( + [ + ['0c3eb7d5fcc414db137c4180a654c06e', 0.333333], + ['0f65edea0a6dc7b9acba1dea313bbb3d', 1.000000], + ['a452e450f913cfa987cad58d50393718', 1.000000], + ['b17c3301ad2ccbb798716fdd405d16e8', 1.000000], + ['bde3b214b06c1efa6cb1bc6284dc72d2', 1.000000], + ['e563e2fb32fce9f00123a65a1bc78c55', 0.666667], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def simple_probas_result_mv(): + result_df = pd.DataFrame( + [ + [0.666667, np.NaN, 0.333333], + [0.750000, 0.250000, np.NaN], + [np.NaN, 0.750000, 0.250000], + [np.NaN, np.NaN, 1.000000], + [0.333333, 0.333333, 0.333333], + [np.NaN, np.NaN, 1.000000], + [np.NaN, 1.000000, np.NaN], + [np.NaN, 0.666667, 0.333333], + [np.NaN, np.NaN, 1.000000], + [np.NaN, np.NaN, 1.000000], + ], + columns=['chicken', 'goose', 'parrot'], + index=[ + '1231239876--5fac0d234ffb2f3b00893ee4', + '1231239876--5fac0d234ffb2f3b00893ee8', + '1231239876--5fac0d234ffb2f3b00893eec', + '1231239876--5fac0d234ffb2f3b00893efb', + '1231239876--5fac0d234ffb2f3b00893efd', + '1231239876--5fac0d234ffb2f3b00893f02', + '1231239876--5fac0d234ffb2f3b00893f03', + '1231239876--5fac0d234ffb2f3b00893f05', + '1231239876--5fac0d234ffb2f3b00893f07', + '1231239876--5fac0d234ffb2f3b00893f08', + ], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df diff --git a/tests/aggregation/data_wawa.py b/tests/aggregation/data_wawa.py new file mode 100644 index 00000000..a4d174e4 --- /dev/null +++ b/tests/aggregation/data_wawa.py @@ -0,0 +1,107 @@ +import pytest +import pandas as pd +import numpy as np + + +# Wawa on toy YSDA + +@pytest.fixture +def toy_labels_result_wawa(): + return pd.DataFrame( + [ + ['t1', 'no'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def toy_skills_result_wawa(): + return pd.DataFrame( + [ + ['w1', 0.6], + ['w2', 0.8], + ['w3', 1.0], + ['w4', 0.4], + ['w5', 0.8], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def toy_probas_result_wawa(): + result_df = pd.DataFrame( + [ + [0.461538, 0.538462], + [0.666667, 0.333333], + [0.277778, 0.722222], + [1.0, np.NaN], + [0.166667, 0.833333], + ], + columns=['yes', 'no'], + index=['t1', 't2', 't3', 't4', 't5'], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df + + +# Wawa on simple + +@pytest.fixture +def simple_labels_result_wawa(simple_ground_truth_df): + return simple_ground_truth_df + + +@pytest.fixture +def simple_skills_result_wawa(): + return pd.DataFrame( + [ + ['0c3eb7d5fcc414db137c4180a654c06e', 0.333333], + ['0f65edea0a6dc7b9acba1dea313bbb3d', 1.000000], + ['a452e450f913cfa987cad58d50393718', 1.000000], + ['b17c3301ad2ccbb798716fdd405d16e8', 1.000000], + ['bde3b214b06c1efa6cb1bc6284dc72d2', 1.000000], + ['e563e2fb32fce9f00123a65a1bc78c55', 0.666667], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def simple_probas_result_wawa(): + result_df = pd.DataFrame( + [ + [0.857143, np.NaN, 0.142857], + [0.818182, 0.181818, np.NaN], + [np.NaN, 0.900000, 0.100000], + [np.NaN, np.NaN, 1.000000], + [0.500000, 0.333333, 0.166667], + [np.NaN, np.NaN, 1.000000], + [np.NaN, 1.000000, np.NaN], + [np.NaN, 0.857143, 0.142857], + [np.NaN, np.NaN, 1.000000], + [np.NaN, np.NaN, 1.000000], + ], + columns=['chicken', 'goose', 'parrot'], + index=[ + '1231239876--5fac0d234ffb2f3b00893ee4', + '1231239876--5fac0d234ffb2f3b00893ee8', + '1231239876--5fac0d234ffb2f3b00893eec', + '1231239876--5fac0d234ffb2f3b00893efb', + '1231239876--5fac0d234ffb2f3b00893efd', + '1231239876--5fac0d234ffb2f3b00893f02', + '1231239876--5fac0d234ffb2f3b00893f03', + '1231239876--5fac0d234ffb2f3b00893f05', + '1231239876--5fac0d234ffb2f3b00893f07', + '1231239876--5fac0d234ffb2f3b00893f08', + ], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df diff --git a/tests/aggregation/data_zbs.py b/tests/aggregation/data_zbs.py new file mode 100644 index 00000000..00ead340 --- /dev/null +++ b/tests/aggregation/data_zbs.py @@ -0,0 +1,105 @@ +import pytest +import pandas as pd +import numpy as np + + +# Wawa on toy YSDA + +@pytest.fixture +def toy_labels_result_zbs(): + return pd.DataFrame( + [ + ['t1', 'no'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def toy_skills_result_zbs(): + return pd.DataFrame( + [ + ['w1', 0.6], + ['w2', 0.8], + ['w3', 1.0], + ['w4', 0.4], + ['w5', 0.8], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def toy_probas_result_zbs(): + result_df = pd.DataFrame( + [ + [0.538462, 0.461538], + [0.333333, 0.666667], + [0.722222, 0.277778], + [np.NaN, 1.000000], + [0.833333, 0.166667], + ], + columns=['no', 'yes'], + index=['t1', 't2', 't3', 't4', 't5'], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df + + +@pytest.fixture +def simple_labels_result_zbs(simple_ground_truth_df): + return simple_ground_truth_df + + +@pytest.fixture +def simple_skills_result_zbs(): + return pd.DataFrame( + [ + ['0c3eb7d5fcc414db137c4180a654c06e', 0.333333], + ['0f65edea0a6dc7b9acba1dea313bbb3d', 1.000000], + ['a452e450f913cfa987cad58d50393718', 1.000000], + ['b17c3301ad2ccbb798716fdd405d16e8', 1.000000], + ['bde3b214b06c1efa6cb1bc6284dc72d2', 1.000000], + ['e563e2fb32fce9f00123a65a1bc78c55', 0.666667], + ], + columns=['performer', 'skill'] + ) + + +@pytest.fixture +def simple_probas_result_zbs(): + result_df = pd.DataFrame( + [ + [0.857143, np.NaN, 0.142857], + [0.818182, 0.181818, np.NaN], + [np.NaN, 0.900000, 0.100000], + [np.NaN, np.NaN, 1.000000], + [0.500000, 0.333333, 0.166667], + [np.NaN, np.NaN, 1.000000], + [np.NaN, 1.000000, np.NaN], + [np.NaN, 0.857143, 0.142857], + [np.NaN, np.NaN, 1.000000], + [np.NaN, np.NaN, 1.000000], + ], + columns=['chicken', 'goose', 'parrot'], + index=[ + '1231239876--5fac0d234ffb2f3b00893ee4', + '1231239876--5fac0d234ffb2f3b00893ee8', + '1231239876--5fac0d234ffb2f3b00893eec', + '1231239876--5fac0d234ffb2f3b00893efb', + '1231239876--5fac0d234ffb2f3b00893efd', + '1231239876--5fac0d234ffb2f3b00893f02', + '1231239876--5fac0d234ffb2f3b00893f03', + '1231239876--5fac0d234ffb2f3b00893f05', + '1231239876--5fac0d234ffb2f3b00893f07', + '1231239876--5fac0d234ffb2f3b00893f08', + ], + ) + result_df.index.name = 'task' + result_df.columns.name = 'label' + return result_df diff --git a/tests/aggregation/test_aggregation.py b/tests/aggregation/test_aggregation.py new file mode 100644 index 00000000..33f597f1 --- /dev/null +++ b/tests/aggregation/test_aggregation.py @@ -0,0 +1,112 @@ +""" +Simplest aggregation algorythms tests on different datasets +Testing all boundary conditions and asserts +""" +import pytest + +import pandas as pd + +from crowdkit.aggregation import MajorityVote, MMSR, Wawa, GoldMajorityVote, ZeroBasedSkill + +from .data_mv import * # noqa: F401, F403 +from .data_mmsr import * # noqa: F401, F403 +from .data_gold_mv import * # noqa: F401, F403 +from .data_wawa import * # noqa: F401, F403 +from .data_zbs import * # noqa: F401, F403 + + +def asserts_compare_df(left_df, right_df, sort_flds): + left_df = left_df.sort_values(sort_flds).reset_index(drop=True) + right_df = right_df.sort_values(sort_flds).reset_index(drop=True) + pd.testing.assert_frame_equal(left_df, right_df, rtol=1e-5) + + +def asserts_compare_matrix_df(left_df, right_df): + left_df = left_df[sorted(left_df.columns.values)] + right_df = right_df[sorted(right_df.columns.values)] + pd.testing.assert_frame_equal(left_df, right_df, rtol=1e-5) + + +@pytest.mark.parametrize( + 'agg_class, fit_method, predict_method, dataset, results_dataset', + [ + (MajorityVote, None, 'fit_predict', 'toy', 'mv'), + (MajorityVote, None, 'fit_predict', 'simple', 'mv'), + (MajorityVote, None, 'fit_predict_proba', 'toy', 'mv'), + (MajorityVote, None, 'fit_predict_proba', 'simple', 'mv'), + (MMSR, None, 'fit_predict', 'toy', 'mmsr'), + (MMSR, None, 'fit_predict', 'simple', 'mmsr'), + (MMSR, None, 'fit_predict_proba', 'toy', 'mmsr'), + (MMSR, None, 'fit_predict_proba', 'simple', 'mmsr'), + (Wawa, None, 'fit_predict', 'toy', 'wawa'), + (Wawa, None, 'fit_predict', 'simple', 'wawa'), + (Wawa, None, 'fit_predict_proba', 'toy', 'wawa'), + (Wawa, None, 'fit_predict_proba', 'simple', 'wawa'), + (GoldMajorityVote, 'fit', 'predict', 'toy', 'gold'), + (GoldMajorityVote, 'fit', 'predict', 'simple', 'gold'), + (GoldMajorityVote, 'fit', 'predict_proba', 'toy', 'gold'), + (GoldMajorityVote, 'fit', 'predict_proba', 'simple', 'gold'), + (ZeroBasedSkill, None, 'fit_predict', 'toy', 'zbs'), + (ZeroBasedSkill, None, 'fit_predict', 'simple', 'zbs'), + (ZeroBasedSkill, None, 'fit_predict_proba', 'toy', 'zbs'), + (ZeroBasedSkill, None, 'fit_predict_proba', 'simple', 'zbs'), + ], + ids=[ + 'Majority Vote predict labels on toy YSDA', + 'Majority Vote predict labelson simple dataset', + 'Majority Vote predict probabilities on toy YSDA', + 'Majority Vote predict probabilities on simple dataset', + 'MMSR predict labels on toy YSDA', + 'MMSR predict labelson simple dataset', + 'MMSR predict probabilities on toy YSDA', + 'MMSR predict probabilities on simple dataset', + 'Wawa predict labels on toy YSDA', + 'Wawa predict labels on simple dataset', + 'Wawa predict probabilities on toy YSDA', + 'Wawa predict probabilities on simple dataset', + 'Gold predict labels on toy YSDA', + 'Gold predict labels on simple dataset', + 'Gold predict probabilities on toy YSDA', + 'Gold predict probabilities on simple dataset', + 'ZBS predict labels on toy YSDA', + 'ZBS predict labels on simple dataset', + 'ZBS predict probabilities on toy YSDA', + 'ZBS predict probabilities on simple dataset', + ], +) +def test_fit_predict_aggregations_methods( + request, not_random, + agg_class, fit_method, predict_method, + dataset, results_dataset +): + """ + Tests all aggregation methods, that fit->predict chain works well, and at each step we have the correct values for: + - tasks_labels + - probas + - performers_skills + """ + # incoming datasets + answers = request.getfixturevalue(f'{dataset}_answers_df') + gold = request.getfixturevalue(f'{dataset}_gold_df') + + # result datasets for comparison + labels_result = request.getfixturevalue(f'{dataset}_labels_result_{results_dataset}') + skills_result = request.getfixturevalue(f'{dataset}_skills_result_{results_dataset}') + probas_result = request.getfixturevalue(f'{dataset}_probas_result_{results_dataset}') + + aggregator = agg_class() + if fit_method is not None: + ret_val = getattr(aggregator, fit_method)(answers, gold) + assert isinstance(ret_val, agg_class) + asserts_compare_df(aggregator.performers_skills, skills_result, ['performer', 'skill']) + + somethings_predict = getattr(aggregator, predict_method)(answers) + + # checking after predict + asserts_compare_df(aggregator.tasks_labels, labels_result, ['label', 'task']) + asserts_compare_matrix_df(aggregator.probas, probas_result) + asserts_compare_df(aggregator.performers_skills, skills_result, ['performer', 'skill']) + if 'proba' in predict_method: + assert somethings_predict is aggregator.probas + else: + assert somethings_predict is aggregator.tasks_labels diff --git a/tests/aggregation/test_aggregation_edge_cases.py b/tests/aggregation/test_aggregation_edge_cases.py new file mode 100644 index 00000000..45b42d02 --- /dev/null +++ b/tests/aggregation/test_aggregation_edge_cases.py @@ -0,0 +1,181 @@ +""" +Simplest aggregation algorythms tests on different datasets +Testing all boundary conditions and asserts +""" +import pytest +import pandas as pd + +from crowdkit.aggregation import MajorityVote, MMSR, Wawa, GoldMajorityVote, ZeroBasedSkill + +from .data_gold_mv import * # noqa: F401, F403 + + +# less field in all crowd datasets + +@pytest.fixture +def answers_no_task(): + return pd.DataFrame({'performer': ['w1'], 'label': ['no']}) + + +@pytest.fixture +def answers_no_label(): + return pd.DataFrame({'performer': ['w1'], 'task': ['t1']}) + + +@pytest.fixture +def answers_no_performer(): + return pd.DataFrame({'task': ['t1'], 'label': ['no']}) + + +@pytest.fixture +def gold_no_task(): + return pd.DataFrame({'label': ['no']}) + + +@pytest.fixture +def gold_no_label(): + return pd.DataFrame({'task': ['t1']}) + + +@pytest.mark.parametrize( + 'agg_class, predict_method, exception, answers_dataset', + [ + (MajorityVote, 'fit_predict', AssertionError, 'answers_no_task'), + (MajorityVote, 'fit_predict', AssertionError, 'answers_no_label'), + (MajorityVote, 'fit_predict', AssertionError, 'answers_no_performer'), + (MajorityVote, 'fit_predict_proba', AssertionError, 'answers_no_task'), + (MajorityVote, 'fit_predict_proba', AssertionError, 'answers_no_label'), + (MajorityVote, 'fit_predict_proba', AssertionError, 'answers_no_performer'), + (MMSR, 'fit_predict', AssertionError, 'answers_no_task'), + (MMSR, 'fit_predict', AssertionError, 'answers_no_label'), + (MMSR, 'fit_predict', AssertionError, 'answers_no_performer'), + (MMSR, 'fit_predict_proba', AssertionError, 'answers_no_task'), + (MMSR, 'fit_predict_proba', AssertionError, 'answers_no_label'), + (MMSR, 'fit_predict_proba', AssertionError, 'answers_no_performer'), + (Wawa, 'fit_predict', AssertionError, 'answers_no_task'), + (Wawa, 'fit_predict', AssertionError, 'answers_no_label'), + (Wawa, 'fit_predict', AssertionError, 'answers_no_performer'), + (Wawa, 'fit_predict_proba', AssertionError, 'answers_no_task'), + (Wawa, 'fit_predict_proba', AssertionError, 'answers_no_label'), + (Wawa, 'fit_predict_proba', AssertionError, 'answers_no_performer'), + (ZeroBasedSkill, 'fit_predict', AssertionError, 'answers_no_task'), + (ZeroBasedSkill, 'fit_predict', AssertionError, 'answers_no_label'), + (ZeroBasedSkill, 'fit_predict', AssertionError, 'answers_no_performer'), + (ZeroBasedSkill, 'fit_predict_proba', AssertionError, 'answers_no_task'), + (ZeroBasedSkill, 'fit_predict_proba', AssertionError, 'answers_no_label'), + (ZeroBasedSkill, 'fit_predict_proba', AssertionError, 'answers_no_performer'), + ], + ids=[ + 'Majority Vote predict raises on no "task"', + 'Majority Vote predict raises on no "label"', + 'Majority Vote predict raises on no "performer"', + 'Majority Vote predict_proba raises on no "task"', + 'Majority Vote predict_proba raises on no "label"', + 'Majority Vote predict_proba raises on no "performer"', + 'MMSR predict raises on no "task"', + 'MMSR predict raises on no "label"', + 'MMSR predict raises on no "performer"', + 'MMSR predict_proba raises on no "task"', + 'MMSR predict_proba raises on no "label"', + 'MMSR predict_proba raises on no "performer"', + 'Wawa predict raises on no "task"', + 'Wawa predict raises on no "label"', + 'Wawa predict raises on no "performer"', + 'Wawa predict_proba raises on no "task"', + 'Wawa predict_proba raises on no "label"', + 'Wawa predict_proba raises on no "performer"', + 'ZBS predict raises on no "task"', + 'ZBS predict raises on no "label"', + 'ZBS predict raises on no "performer"', + 'ZBS predict_proba raises on no "task"', + 'ZBS predict_proba raises on no "label"', + 'ZBS predict_proba raises on no "performer"', + ], +) +def test_agg_raise_on_less_columns(request, agg_class, predict_method, exception, answers_dataset): + """ + Tests all aggregation methods raises basik exceptions + """ + answers = request.getfixturevalue(answers_dataset) + aggregator = agg_class() + with pytest.raises(exception): + getattr(aggregator, predict_method)(answers) + + +@pytest.mark.parametrize( + 'exception, answers_on_gold_dataset', + [ + # test raises in fit + (AssertionError, 'answers_no_task'), + (AssertionError, 'answers_no_label'), + (AssertionError, 'answers_no_performer'), + (AssertionError, 'gold_no_task'), + (AssertionError, 'gold_no_label'), + # raises on mismatch datasets + (AssertionError, 'toy_answers_on_gold_df_cannot_fit'), + ], + ids=[ + # test raises in fit + 'no "task" in answers_on_gold', + 'no "label" in answers_on_gold', + 'no "performer" in answers_on_gold', + 'no "task" in gold_df', + 'no "label" in gold_df', + # raises on mismatch datasets + 'cannot compute performers skills', + ], +) +def test_gold_mv_raise_in_fit(request, not_random, toy_gold_df, exception, answers_on_gold_dataset): + """ + Tests Gold MajorityVote on raises basik exceptions + """ + answers_on_gold = request.getfixturevalue(answers_on_gold_dataset) + + aggregator = GoldMajorityVote() + with pytest.raises(exception): + aggregator.fit(answers_on_gold, toy_gold_df) + + +@pytest.mark.parametrize( + 'predict_method, exception, answers_on_gold_dataset, answers_dataset', + [ + # test raises in predict + ('predict', AssertionError, 'toy_answers_df', 'answers_no_task'), + ('predict', AssertionError, 'toy_answers_df', 'answers_no_label'), + ('predict', AssertionError, 'toy_answers_df', 'answers_no_performer'), + # test raises in predict_proba + ('predict_proba', AssertionError, 'toy_answers_df', 'answers_no_task'), + ('predict_proba', AssertionError, 'toy_answers_df', 'answers_no_label'), + ('predict_proba', AssertionError, 'toy_answers_df', 'answers_no_performer'), + # raises on mismatch datasets + ('predict', AssertionError, 'toy_answers_on_gold_df_cannot_predict', 'toy_answers_df'), + ('predict_proba', AssertionError, 'toy_answers_on_gold_df_cannot_predict', 'toy_answers_df'), + ], + ids=[ + # test raises in predict + 'raise in predict on no "task" in answers_on_gold', + 'raise in predict on no "label" in answers_on_gold', + 'raise in predict on no "performer" in answers_on_gold', + # test raises in predict_proba + 'raise in predict_proba on no "task" in answers_on_gold', + 'raise in predict_proba on no "label" in answers_on_gold', + 'raise in predict_proba on no "performer" in answers_on_gold', + # raises on mismatch datasets + 'raise in predict - cannot compute labels', + 'raise in predict_proba - cannot compute probas', + ], +) +def test_gold_mv_raise_in_predict( + request, not_random, toy_gold_df, + predict_method, exception, answers_on_gold_dataset, answers_dataset +): + """ + Tests Gold MajorityVote on raises basik exceptions + """ + answers_on_gold = request.getfixturevalue(answers_on_gold_dataset) + answers = request.getfixturevalue(answers_dataset) + + aggregator = GoldMajorityVote() + aggregator.fit(answers_on_gold, toy_gold_df) + with pytest.raises(exception): + getattr(aggregator, predict_method)(answers) diff --git a/tests/aggregation/test_ds_aggregation.py b/tests/aggregation/test_ds_aggregation.py new file mode 100644 index 00000000..74c7ee0e --- /dev/null +++ b/tests/aggregation/test_ds_aggregation.py @@ -0,0 +1,187 @@ +""" +Simplest aggregation algorythms tests on toy YSDA dataset +Testing all boundary conditions and asserts +""" +import numpy as np +import pandas as pd +import pytest + +from pandas.testing import assert_frame_equal +from crowdkit.aggregation import DawidSkene +from crowdkit.aggregation.utils import evaluate + + +def test_aggregate_ds_on_toy_ysda(toy_answers_df, toy_ground_truth_df): + np.random.seed(42) + predict_df = DawidSkene(10).fit_predict(toy_answers_df) + accuracy = evaluate(toy_ground_truth_df, predict_df) + assert accuracy == 1.0 + + +def test_aggregate_ds_on_simple(simple_answers_df, simple_ground_truth_df): + np.random.seed(42) + predict_df = DawidSkene(10).fit_predict(simple_answers_df) + accuracy = evaluate(simple_ground_truth_df, predict_df) + assert accuracy == 1.0 + + +def _make_probas(data): + # TODO: column should not be an index! + columns = pd.Index(['task', 'no', 'yes'], name='label') + return pd.DataFrame(data, columns=columns).set_index('task') + + +def _make_tasks_labels(data): + # TODO: should task be indexed? + return pd.DataFrame(data, columns=['task', 'label']) + + +def _make_errors(data): + return pd.DataFrame( + data, + columns=['performer', 'label', 'no', 'yes'], + ).set_index(['performer', 'label']) + + +@pytest.fixture +def data(): + return pd.DataFrame( + [ + ['t1', 'w1', 'no'], + ['t1', 'w2', 'yes'], + # ['t1', 'w3', np.NaN], + ['t1', 'w4', 'yes'], + ['t1', 'w5', 'no'], + + ['t2', 'w1', 'yes'], + ['t2', 'w2', 'yes'], + ['t2', 'w3', 'yes'], + ['t2', 'w4', 'no'], + ['t2', 'w5', 'no'], + + ['t3', 'w1', 'yes'], + ['t3', 'w2', 'no'], + ['t3', 'w3', 'no'], + ['t3', 'w4', 'yes'], + ['t3', 'w5', 'no'], + + ['t4', 'w1', 'yes'], + ['t4', 'w2', 'yes'], + ['t4', 'w3', 'yes'], + ['t4', 'w4', 'yes'], + ['t4', 'w5', 'yes'], + + ['t5', 'w1', 'yes'], + ['t5', 'w2', 'no'], + ['t5', 'w3', 'no'], + ['t5', 'w4', 'no'], + ['t5', 'w5', 'no'], + ], + columns=['task', 'performer', 'label'] + ) + + +@pytest.fixture +def probas_iter_0(): + return _make_probas([ + ['t1', 0.5, 0.5], + ['t2', 0.4, 0.6], + ['t3', 0.6, 0.4], + ['t4', 0.0, 1.0], + ['t5', 0.8, 0.2], + ]) + + +@pytest.fixture +def priors_iter_0(): + return pd.Series({'no': 0.46, 'yes': 0.54}) + + +@pytest.fixture +def tasks_labels_iter_0(): + return _make_tasks_labels([ + ['t1', 'no'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ]) + + +@pytest.fixture +def errors_iter_0(): + return _make_errors([ + ['w1', 'no', 0.22, 0.19], + ['w1', 'yes', 0.78, 0.81], + + ['w2', 'no', 0.61, 0.22], + ['w2', 'yes', 0.39, 0.78], + + ['w3', 'no', 0.78, 0.27], + ['w3', 'yes', 0.22, 0.73], + + ['w4', 'no', 0.52, 0.30], + ['w4', 'yes', 0.48, 0.70], + + ['w5', 'no', 1.00, 0.63], + ['w5', 'yes', 0.00, 0.37], + ]) + + +@pytest.fixture +def probas_iter_1(): + return _make_probas([ + ['t1', 0.35, 0.65], + ['t2', 0.26, 0.74], + ['t3', 0.87, 0.13], + ['t4', 0.00, 1.00], + ['t5', 0.95, 0.05], + ]) + + +@pytest.fixture +def priors_iter_1(): + return pd.Series({'no': 0.49, 'yes': 0.51}) + + +@pytest.fixture +def tasks_labels_iter_1(): + return _make_tasks_labels([ + ['t1', 'yes'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ]) + + +@pytest.fixture +def errors_iter_1(): + return _make_errors([ + ['w1', 'no', 0.14, 0.25], + ['w1', 'yes', 0.86, 0.75], + + ['w2', 'no', 0.75, 0.07], + ['w2', 'yes', 0.25, 0.93], + + ['w3', 'no', 0.87, 0.09], + ['w3', 'yes', 0.13, 0.91], + + ['w4', 'no', 0.50, 0.31], + ['w4', 'yes', 0.50, 0.69], + + ['w5', 'no', 1.00, 0.61], + ['w5', 'yes', 0.00, 0.39], + ]) + + +@pytest.mark.parametrize('n_iter', [0, 1]) +def test_dawid_skene_step_by_step(request, data, n_iter): + probas = request.getfixturevalue(f'probas_iter_{n_iter}') + tasks_labels = request.getfixturevalue(f'tasks_labels_iter_{n_iter}') + errors_iter = request.getfixturevalue(f'errors_iter_{n_iter}') + + ds = DawidSkene(n_iter).fit(data) + assert_frame_equal(probas, ds.probas, check_like=True, atol=0.005) + assert_frame_equal(tasks_labels, ds.tasks_labels, check_like=True, atol=0.005) + assert_frame_equal(errors_iter, ds.errors, check_like=True, atol=0.005) diff --git a/tests/aggregation/test_text_aggregation.py b/tests/aggregation/test_text_aggregation.py new file mode 100644 index 00000000..8691abd6 --- /dev/null +++ b/tests/aggregation/test_text_aggregation.py @@ -0,0 +1,30 @@ +import pytest + +from crowdkit.aggregation import HRRASA, RASA + +from .data_hrrasa import * # noqa: F401, F403 + + +@pytest.mark.parametrize( + 'agg_class, predict_method, dataset, results_dataset', + [ + (HRRASA, 'fit_predict', 'simple', 'hrrasa'), + (RASA, 'fit_predict', 'simple', 'rasa'), + ], + ids=[ + 'HRRASA predict outputs on simple dataset', + 'RASA predict outputs on simple dataset', + ], +) +def test_fit_predict_text_aggregations_methods( + request, not_random, + agg_class, predict_method, + dataset, results_dataset +): + answers = request.getfixturevalue(f'{dataset}_text_df') + result = request.getfixturevalue(f'{dataset}_text_result_{results_dataset}') + + aggregator = agg_class() + + somethings_predict = getattr(aggregator, predict_method)(answers) + assert somethings_predict.equals(result) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..53b92588 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,214 @@ +import pytest +import random + +import pandas as pd +import numpy as np + + +@pytest.fixture +def not_random(): + random.seed(42) + np.random.seed(42) + + +# toy YSDA dataset + +@pytest.fixture +def toy_answers_df(): + return pd.DataFrame( + [ + ['w1', 't1', 'no'], + ['w1', 't2', 'yes'], + ['w1', 't3', 'yes'], + ['w1', 't4', 'yes'], + ['w1', 't5', 'yes'], + ['w2', 't1', 'yes'], + ['w2', 't2', 'yes'], + ['w2', 't3', 'no'], + ['w2', 't4', 'yes'], + ['w2', 't5', 'no'], + ['w3', 't2', 'yes'], + ['w3', 't3', 'no'], + ['w3', 't4', 'yes'], + ['w3', 't5', 'no'], + ['w4', 't1', 'yes'], + ['w4', 't2', 'no'], + ['w4', 't3', 'yes'], + ['w4', 't4', 'yes'], + ['w4', 't5', 'no'], + ['w5', 't1', 'no'], + ['w5', 't2', 'no'], + ['w5', 't3', 'no'], + ['w5', 't4', 'yes'], + ['w5', 't5', 'no'], + ], + columns=['performer', 'task', 'label'] + ) + + +@pytest.fixture +def toy_ground_truth_df(): + return pd.DataFrame( + [ + ['t1', 'yes'], + ['t2', 'yes'], + ['t3', 'no'], + ['t4', 'yes'], + ['t5', 'no'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def toy_gold_df(): + return pd.DataFrame( + [ + ['t1', 'yes', 1.0], + ['t2', 'yes', 1.0], + ], + columns=['task', 'label', 'weight'] + ) + + +# Simple dataset that imitates real toloka answers + +@pytest.fixture +def simple_answers_df(): + return pd.DataFrame( + [ + # good performers + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893eec', 'goose'], + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893f03', 'goose'], + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893f05', 'goose'], + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893f02', 'parrot'], + ['bde3b214b06c1efa6cb1bc6284dc72d2', '1231239876--5fac0d234ffb2f3b00893f08', 'parrot'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893ee8', 'chicken'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893f07', 'parrot'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893efd', 'chicken'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893ee4', 'chicken'], + ['b17c3301ad2ccbb798716fdd405d16e8', '1231239876--5fac0d234ffb2f3b00893f03', 'goose'], + ['a452e450f913cfa987cad58d50393718', '1231239876--5fac0d234ffb2f3b00893ee8', 'chicken'], + ['a452e450f913cfa987cad58d50393718', '1231239876--5fac0d234ffb2f3b00893eec', 'goose'], + ['a452e450f913cfa987cad58d50393718', '1231239876--5fac0d234ffb2f3b00893f05', 'goose'], + ['a452e450f913cfa987cad58d50393718', '1231239876--5fac0d234ffb2f3b00893f02', 'parrot'], + ['a452e450f913cfa987cad58d50393718', '1231239876--5fac0d234ffb2f3b00893f08', 'parrot'], + ['0f65edea0a6dc7b9acba1dea313bbb3d', '1231239876--5fac0d234ffb2f3b00893eec', 'goose'], + ['0f65edea0a6dc7b9acba1dea313bbb3d', '1231239876--5fac0d234ffb2f3b00893ee8', 'chicken'], + ['0f65edea0a6dc7b9acba1dea313bbb3d', '1231239876--5fac0d234ffb2f3b00893f03', 'goose'], + ['0f65edea0a6dc7b9acba1dea313bbb3d', '1231239876--5fac0d234ffb2f3b00893ee4', 'chicken'], + # fraudster - always answers "parrot" + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893eec', 'parrot'], # 'goose' + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893f07', 'parrot'], + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893efd', 'parrot'], # 'chicken' + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893ee4', 'parrot'], # 'chicken' + ['0c3eb7d5fcc414db137c4180a654c06e', '1231239876--5fac0d234ffb2f3b00893f05', 'parrot'], # 'goose' + # careless + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893ee8', 'goose'], # 'chicken' + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893f02', 'parrot'], + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893f08', 'parrot'], + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893f07', 'parrot'], + ['e563e2fb32fce9f00123a65a1bc78c55', '1231239876--5fac0d234ffb2f3b00893efd', 'goose'], # 'chicken' + ], + columns=['performer', 'task', 'label'] + ) + + +@pytest.fixture +def simple_ground_truth_df(): + return pd.DataFrame( + [ + ['1231239876--5fac0d234ffb2f3b00893eec', 'goose'], + ['1231239876--5fac0d234ffb2f3b00893f03', 'goose'], + ['1231239876--5fac0d234ffb2f3b00893f05', 'goose'], + ['1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['1231239876--5fac0d234ffb2f3b00893f02', 'parrot'], + ['1231239876--5fac0d234ffb2f3b00893f08', 'parrot'], + ['1231239876--5fac0d234ffb2f3b00893f07', 'parrot'], + ['1231239876--5fac0d234ffb2f3b00893ee8', 'chicken'], + ['1231239876--5fac0d234ffb2f3b00893efd', 'chicken'], + ['1231239876--5fac0d234ffb2f3b00893ee4', 'chicken'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def simple_gold_df(): + return pd.DataFrame( + [ + ['1231239876--5fac0d234ffb2f3b00893eec', 'goose'], + ['1231239876--5fac0d234ffb2f3b00893efb', 'parrot'], + ['1231239876--5fac0d234ffb2f3b00893ee8', 'chicken'], + ], + columns=['task', 'label'] + ) + + +@pytest.fixture +def simple_text_df(): + return pd.DataFrame( + [ + ['1255-74899-0020', 'as soon as you downed my worst in stockings sweetheart', 'b6214dff3665ba9c6bc96dc326a417c9', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8628045, -0.66789037]), np.array([0.8619265, 0.3983395])], # noqa + ['1651-136854-0030', 'it must indeed be allowed that is structure and sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thought in a philosophical language meaning this the works of socrates who it was said reduced philosophy to the simplicity of life ', 'c740c713b07635302cf145d16ae2d698', np.nan, np.array([1.5327121, 2.5106835]), np.nan], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to and imitate them i think', 'c854a0b6d71ec3503e0ce4ea2179d8c7', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.217721, 16.948954]), np.array([10.686009, 17.633106])], # noqa + ['1255-74899-0020', 'i said i was just talking sweetheart', '19fdfe8fe1f7dd366f594bf2ce0bdd3e', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.9092205, -0.5734551]), np.array([0.8508962, 0.38230664])], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structure of your sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thoughts in a philosophical language being in the the reversal of socrates who it was said reduced philosophy to the simplicity of common life', 'c854a0b6d71ec3503e0ce4ea2179d8c7', np.nan, np.array([1.523252, 2.5053673]), np.nan], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structure iss expanded and often has somewhat of the invention of latin in that he delighted to express familiar thoughts in philosophical language being in this the reverse of socrates whom it was said reduced phylosophy to the simplicity of common life', '2ef99a0a7639b5fcd7e66e59e7b7e3bf', np.nan, np.array([1.5202638, 2.481906]), np.nan], # noqa + ['7601-175351-0021', 'there is more than one among us who like to eliminate them i think', 'ab2784b4377e0848ebff96098fb67301', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.222795, 16.946047]), np.array([10.673217, 17.622795])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like doing imitate them i think', '07bda6ebab4a387f8ced48c40c5878a6', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.197447, 16.96481]), np.array([10.673589, 17.622868])], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structurally sentences is expanded and often has somewhat be inversion of latin and that he delighted to express familiar thought in a philosophical language being in this the reverse of socrates who it was said reduce philosophy to the simplicity of common life', 'efcfbfa835fdcec9a865f82bbf6d36df', np.nan, np.array([1.5325097, 2.5046723]), np.nan], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to imitate them i think', 'efcfbfa835fdcec9a865f82bbf6d36df', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219084, 16.948515]), np.array([10.673225, 17.622633])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like imitate them i think', '27dfec580d349e20166b56be480336ea', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.217423, 16.949697]), np.array([10.673151, 17.622568])], # noqa + ['1255-74899-0020', 'as soon as you darned my worst is talking sweetheart', '4330bead5a86328a3d6987b3c065dc80', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.9123898, -0.58190906]), np.array([0.8685327, 0.39246213])], # noqa + ['1255-74899-0020', 'as soon as you\'ve done my worthiest stocking sweetheart', '2c3954249d00e0aa0adab7226eba47e4', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8627588, -0.661437]), np.array([0.8250761, 0.39154962])], # noqa + ['1255-74899-0020', 'i should have seen you donned my worst heel stocking sweetheart', 'cd5520854f89b31ae5f8673fa2992ac7', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8909845, -0.63419354]), np.array([0.89531624, 0.3790261])], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structure of his sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thoughts in a philosophical language being in this the reverse of socrates who it was said reduced philosophy to the simplicity of common life', '5701a8373b728dd333c0796df9f2a8f4', np.nan, np.array([1.5207397, 2.5072067]), np.nan], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to imitate anything', '9e9847e525a1d0fdfaf22d83fa75d115', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219814, 16.947468]), np.array([10.675565, 17.62496])], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structure of his sentences is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thoughts in philosophical language being in this the reverse of socrates who it was said reduced philosophy to the simplicity of common life ', 'cd5520854f89b31ae5f8673fa2992ac7', np.nan, np.array([1.51274, 2.5141635]), np.nan], # noqa + ['1651-136854-0030', 'it must indeed be allowed that the structure of his sentence is expanded and often has somewhat of the inversion of latin and that he delighted to express familiar thoughts and philosophical language being in this the reverse of socrates who it was said reduced philosophy to the simplicity of common life', '2047f49d9762c9db85f2240b06dd2d12', np.nan, np.array([1.5277034, 2.5007238]), np.nan], # noqa + ['1651-136854-0030', 'it must in need be allowed the structure id expenses it expend it and obten some water the inversion of letter and he the new year fox and the language vniversal soxes it was said phlosopharty of common life', '0598870b6ec9d30e31958f5b517b4336', np.nan, np.array([1.5254755, 2.4552798]), np.nan], # noqa + ['1651-136854-0030', 'it must indeed be a loud that the structurer he senses is expanded and often has some what of the inversion of latin and that he delighted to express the new year fault in a philosophical language being in this the reverse of sock verties who it was said reduced philosophy is the simplicity to common life', 'f733ff7874adaf5e83b5243e7aebc6ef', np.nan, np.array([1.5204769, 2.5086155]), np.nan], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to an imitate they might think', 'f733ff7874adaf5e83b5243e7aebc6ef', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.218769, 16.948397]), np.array([10.676133, 17.628136])], # noqa + ['1255-74899-0020', 'i shouuld had seen you damp my word', 'bd46bdedc1a395765f7163152ae1d6e9', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([4.2875843, 0.24995197]), np.array([0.8510484, 0.39480436])], # noqa + ['1255-74899-0020', 'as soon as you donned my worst stocking sweetheart', '1d94612b35ca8f3b1567e170808afb9d', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8590927, -0.6760239]), np.array([0.8439374, 0.39588147])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to imitate them i think', '1ea4a74105f43c96ab394e7f6495ef27', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219354, 16.948345]), np.array([10.679564, 17.62985])], # noqa + ['1255-74899-0020', 'as soon as he dawned my worst is talking to sweetheart', 'fca852cd8dec559a31e57d7957a4de13', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.88419, -0.58892304]), np.array([0.8540088, 0.3765296])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to imitate my think', '194fd65c5c1246bed88b141320db8bcd', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.218983, 16.948296]), np.array([10.654044, 17.605904])], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to immitate them i think', '1f30d866dc87d8aba36da6c94d578097', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.226313, 16.944336]), np.array([10.672564, 17.62215])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to wanna imitate them i think', '5701a8373b728dd333c0796df9f2a8f4', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.2202015, 16.948513]), np.array([10.692975, 17.64521])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to imitate them i think', 'fca852cd8dec559a31e57d7957a4de13', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219054, 16.948479]), np.array([10.673226, 17.622896])], # noqa + ['7601-175351-0021', 'they is more than one amongs us who would like to immitate them i think', 'f84d186e73c19bba35b59c631e56d860', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.233541, 16.935406]), np.array([10.667211, 17.619024])], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to imitate them i think', '86cdbbd441956f774c1f09ca4e47dfeb', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219196, 16.948393]), np.array([10.674336, 17.623909])], # noqa + ['1255-74899-0020', 'as soon as you dawned my worst in stalking the sweetheart', 'c854a0b6d71ec3503e0ce4ea2179d8c7', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8683596, -0.60298634]), np.array([0.86584425, 0.43221])], # noqa + ['1255-74899-0020', 'as soon as you dawned my worst in stocking sweetheart', 'f84d186e73c19bba35b59c631e56d860', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8571255, -0.6310346]), np.array([0.8503618, 0.37850586])], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to imitate him i think', '2a2b550bdf723c06898ce62185f485a7', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.227572, 16.945225]), np.array([10.672767, 17.622879])], # noqa + ['7601-175351-0021', 'there\'s more than one amongst us who would like to immitate them i think', '555686599071ea2f3012cd64a381cf60', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.223191, 16.945898]), np.array([10.673242, 17.623188])], # noqa + ['1255-74899-0020', 'i should have seen you turned my worsted stocking sweetheart', 'd91be229ea4909b060e99609ea5b4f66', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.9095235, -0.62991136]), np.array([0.7952585, 0.45487818])], # noqa + ['1255-74899-0020', 'i should have seen tart my worsest tlking sweetheart', '4410d66e6c13650a0478455ad015f118', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.906517, -0.6025044]), np.array([0.84691495, 0.39889106])], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to imitate them i think', 'd91be229ea4909b060e99609ea5b4f66', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.212206, 16.952347]), np.array([10.676438, 17.627277])], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to an imitate they might think', '74b8938eb4736c40da2852ce2c5e5008', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.219014, 16.94834]), np.array([10.675005, 17.626295])], # noqa + ['1255-74899-0020', 'i should have dulled by what is locking some dark', 'c5cf3042be0413d5a2a8360ba344e258', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([6.163941, -0.6219455]), np.array([0.84807605, 0.4049313])], # noqa + ['8254-84205-0005', 'it\'s all nonsense nerd crowd chris for them to think that they\'re staying on account of us who low griggs were you listening', '2047f49d9762c9db85f2240b06dd2d12', np.nan, np.array([4.953112, -0.41251197]), np.nan], # noqa + ['7601-175351-0021', 'there is more than one amongst us who would like to do an imitate i think', '0af574140e706efce6b224417b1d8aac', 'there\'s more than one amongst us who would like to imitate them i think', np.array([10.201713, 16.964706]), np.array([10.67186, 17.621662])], # noqa + ['1255-74899-0020', 'as soon as he donned my worsted stockings sweetheart', '0af574140e706efce6b224417b1d8aac', 'i\'d sooner see you darning my worsted stockings sweetheart', np.array([1.8677676, -0.6548876]), np.array([0.813319, 0.32563943])], # noqa + ['8254-84205-0005', 'it\'s all nonsense ned cried chris for them to think that they\'re staying on account of us hello griggs were you listening', 'fca852cd8dec559a31e57d7957a4de13', np.nan, np.array([4.95962, -0.4224869]), np.nan], # noqa + ['8254-84205-0005', 'its all nonsense ned cried chris for them to think that theyre staying on account of us hello griggs were you listening', '0af574140e706efce6b224417b1d8aac', np.nan, np.array([4.946168, -0.4152995]), np.nan], # noqa + ['8254-84205-0005', 'it\'s all nonsense ned cried chris for them to think that they\'re staying on account of us hello greg were you listening ', '525be6f4af2fb2c4781575b7c9fbaee0', np.nan, np.array([4.9694304, -0.42101935]), np.nan], # noqa + ['8254-84205-0005', 'its all nonsense ned cried chris for them to think that they are staying on account of us hello grigs are you listening', 'd91be229ea4909b060e99609ea5b4f66', np.nan, np.array([4.968234, -0.42592508]), np.nan], # noqa + ['8254-84205-0005', 'it\'s all nonsense ned cried chris for them to think that they\'re staying on account of us hello kriegs were you listening', '043bf8732e747ca7c0a7edd6ae13182f', np.nan, np.array([4.967573, -0.43132243]), np.nan], # noqa + ['8254-84205-0005', 'its all nonsense ned cried chris for them to think that they\'re staying on account of us hello chris are you listening', '8656955f53e6d3cb9e56171be33ef2bc', np.nan, np.array([4.964371, -0.42836314]), np.nan], # noqa + ['7601-101619-0003', 'nature discovers this confusion to us painters hold that the same motions and grimaces of the face that serve for weeping serve for laughter too and indeed before the one or the other be finished do but observe the painter\'s manner of handling and you will be in doubt to which of the two the design tends and the extreme of laughter does at last bring tears', '043bf8732e747ca7c0a7edd6ae13182f', np.nan, np.array([0.791476, 2.0119832]), np.nan], # noqa + ['7601-101619-0003', 'nature discovers this confusion to us painters hold that the same motions and grimaces of the face that serve for weeping serve for laughter to and indeed before the one or the other be finished do but observe the painters manner of handling and you will be in doubt to which of the two the design tends and the extreme of laughter does it last bring tears', '0af574140e706efce6b224417b1d8aac', np.nan, np.array([0.81492954, 1.9998435]), np.nan], # noqa + ['7601-101619-0003', 'nature discovers is confusion to us painters hold it the same motions and grimaces of the face that serve for whipping serve for laughter too and indeed before the one or the other be finished do but observe the painters manner of handling and you will be in doubt to of the two of designed tens and the extreme of laughter doesn\'t last bring tears', 'e973444b19802698a8c7c602be1add89', np.nan, np.array([0.78092235, 2.0215895]), np.nan], # noqa + ['7601-101619-0003', 'nature discovers are confusing to us pain is holding the same emotions and grimaces of the face that serve for weeping serve for laughter too and indeed before the one and the other be finished do but observe the painter\'s manner of handling and you will be in doubt to which of the two the design tems and the extreme laghter does it last to bring tears', 'c9825afd938fdc509c48f0135af78e4a', np.nan, np.array([0.7876146, 2.0195444]), np.nan], # noqa + ['7601-101619-0003', 'emotions and promises of the case that serve for weeping serve for laughter', '07bda6ebab4a387f8ced48c40c5878a6', np.nan, np.array([0.79881185, 1.9938986]), np.nan], # noqa + ['7601-101619-0003', 'natures discovers as confusion to us painters hold that the same motions and grimaces of the face that serve for weeping serve for laughter to and indeed before the one or the other be finished do but observe the painters manner of handling and you will be in doubt to which of the two the design tends and the extreme of laughter does it last bring tears', '27dfec580d349e20166b56be480336ea', np.nan, np.array([0.79025686, 2.0123045]), np.nan], # noqa + ['7601-101619-0003', 'nature discovers his confusion to us painters hold the same emotions and grimaces of the face that serve for gripping serve for laughter too and indeed before the one or the other be finished do but observe the painter\'s manner of handling and you will be in doubt to which of the two the design tends and the extreme of laughter doesn\'t last to bring tears', 'caf701c07a3374bdc98ae6bf230d4d56', np.nan, np.array([0.77356094, 2.0207922]), np.nan], # noqa + ], + columns=['task', 'output', 'performer', 'golden', 'embedding', 'golden_embedding'] + ) diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py new file mode 100644 index 00000000..0b1e1e2b --- /dev/null +++ b/tests/metrics/test_metrics.py @@ -0,0 +1,21 @@ +import pandas as pd + +from crowdkit.metrics.data import consistency +from crowdkit.metrics.performers import golden_set_accuracy, accuracy_on_aggregates + + +def test_consistency(request): + assert consistency(request.getfixturevalue('toy_answers_df')) == 0.9384615384615385 + + +def test_golden_set_accuracy(request): + answers = request.getfixturevalue('toy_answers_df') + golden = request.getfixturevalue('toy_gold_df').set_index('task')['label'] + assert golden_set_accuracy(answers, golden) == 5 / 9 + assert golden_set_accuracy(answers, golden, by_performer=True).equals(pd.Series([0.5, 1.0, 1.0, 0.5, 0.0], index=['w1', 'w2', 'w3', 'w4', 'w5'], name='performer')) + + +def test_accuracy_on_aggregates(request): + answers = request.getfixturevalue('toy_answers_df') + assert accuracy_on_aggregates(answers) == 0.7083333333333334 + assert accuracy_on_aggregates(answers, by_performer=True).equals(pd.Series([0.6, 0.8, 1.0, 0.4, 0.8], index=['w1', 'w2', 'w3', 'w4', 'w5'], name='performer')) diff --git a/tools/gen_stubs.py b/tools/gen_stubs.py new file mode 100644 index 00000000..ad73184e --- /dev/null +++ b/tools/gen_stubs.py @@ -0,0 +1,29 @@ +from argparse import ArgumentParser + +from stubgen.ast_builder import ASTBuilder +from stubgen.import_ import override_module_import_path, traverse_modules + + +def main(): + parser = ArgumentParser() + parser.add_argument('--src-root', type=str, required=True, help='Path to source files to process') + parser.add_argument('--module-root', type=str, required=False, help='Module name to import these sources as') + args = parser.parse_args() + + override_module_import_path(args.module_root, args.src_root) + + for module_name, module in traverse_modules(args.module_root, args.src_root): + + if module_name not in ( + 'crowdlib.aggregation.dawid_skene', + 'crowdlib.aggregation.base_aggregator', + ): + continue + + stub_path = module.__file__ + 'i' + with open(stub_path, 'w') as stub_flo: + stub_flo.write(str(ASTBuilder(module_name, module))) + + +if __name__ == '__main__': + main() diff --git a/tools/stubgen/__init__.py b/tools/stubgen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/stubgen/ast_builder.py b/tools/stubgen/ast_builder.py new file mode 100644 index 00000000..1fb8cab7 --- /dev/null +++ b/tools/stubgen/ast_builder.py @@ -0,0 +1,111 @@ +import inspect +from typing import Optional + +from .common import BaseDefinition, BaseLiteral, BaseASTBuilder, Node +from .definitions import AttributeAnnotationDef, AttributeDef, ClassDef, DocumentationDef, FunctionDef, ModuleDef, StaticMethodDef, ClassMethodDef +from .literals import AnnotationLiteral, ReferenceLiteral, TypeHintLiteral, ValueLiteral + + +class ASTBuilder(BaseASTBuilder): + + def __init__(self, module_name, module): + self.module_name = module_name + self.module_rep = self.get_module_definition(Node('', '', module)) + + def __str__(self): + return str(self.module_rep) + + # + + def get_docstring(self, node: Node) -> Optional[BaseDefinition]: + if getattr(node.obj, '__doc__') is not None: + return self.get_documentation_definition(node.get_member('__doc__')) + return None + + # Get representation for definitions + + def resolve_namespace_definition(self, node: Node): + """Resolve a node to its definition""" + + if inspect.isclass(node.obj): + return self.get_class_definition(node) + + if inspect.isfunction(node.obj): + return self.get_function_definition(node) + + return self.get_attribute_definition(node) + + # def resolve_value_literal(sel): + + def get_definition(self, node: Node): + """Resolve a node to its definition""" + + if inspect.isclass(node.obj): + return self.get_class_definition(node) + + if inspect.isfunction(node.obj): + return self.get_function_definition(node) + + return self.get_attribute_definition(node) + + def get_attribute_definition(self, node: Node) -> BaseDefinition: + """Get a definition representing `name = literal`""" + return AttributeDef(node, self) + + def get_attribute_annotation_definition(self, node: Node) -> BaseDefinition: + """Get a definition representing `name: literal`""" + return AttributeAnnotationDef(node, self) + + def get_documentation_definition(self, node: Node) -> BaseDefinition: + """Get a definition representing docstring""" + return DocumentationDef(node, self) + + def get_class_definition(self, node: Node) -> BaseDefinition: + + if node.obj.__module__ == self.module_name: + return ClassDef(node, self) + + return self.get_attribute_definition(node) + + def get_function_definition(self, node: Node) -> BaseDefinition: + """Get a definition representing a function or a method""" + return FunctionDef(node, self) + + def get_class_method_definition(self, node: Node) -> BaseDefinition: + return ClassMethodDef(node, self) + + def get_static_method_definition(self, node: Node) -> BaseDefinition: + return StaticMethodDef(node, self) + + def get_module_definition(self, node: Node) -> BaseDefinition: + """Get a definition representing a module""" + return ModuleDef(node, self) + + # Get representations for values + + def get_literal(self, obj): + """Resolves an object to a literal""" + + if inspect.isclass(obj) or inspect.ismodule(obj): + return ReferenceLiteral(obj, self) + + if str(obj).startswith('typing.'): + return TypeHintLiteral(obj, self) + + return ValueLiteral(obj, self) + + def get_literal_for_annotation(self, obj) -> AnnotationLiteral: + """Get a literal for annotations""" + return AnnotationLiteral(obj, self) + + def get_literal_for_reference(self, obj) -> BaseLiteral: + """Get a literal in form of `x.y.z`""" + return ReferenceLiteral(obj, self) + + def get_literal_for_type_hint(self, obj) -> BaseLiteral: + """Get literal for a typing.* hints""" + return TypeHintLiteral(obj, self) + + def get_literal_for_value(self, obj: Node) -> BaseLiteral: + """Get a literal for plain values such as None, strings etc""" + return ValueLiteral(obj, self) diff --git a/tools/stubgen/common.py b/tools/stubgen/common.py new file mode 100644 index 00000000..c1033544 --- /dev/null +++ b/tools/stubgen/common.py @@ -0,0 +1,133 @@ +import textwrap +from typing import Optional, Generator, get_type_hints + + +class Node: + """Spec for a represented object""" + + def __init__(self, namespace: str, name: str, obj): + self.namespace = namespace + self.name = name + self.obj = obj + + @property + def indentation_level(self) -> int: + if not self.namespace: + return 0 + return self.namespace.count('.') + 1 + + def get_member(self, member_name: str) -> 'Node': + return Node( + namespace=f'{self.namespace}.{self.name}' if self.namespace else self.name if self.name else '', + name=member_name, + obj=getattr(self.obj, member_name) + ) + + def get_annotation(self, member_name: str) -> 'Node': + return Node( + namespace=f'{self.namespace}.{self.name}' if self.namespace else self.name if self.name else '', + name=member_name, + obj=get_type_hints(self.obj)[member_name], + ) + + +class BaseRepresentation: + + def __str__(self): + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + def traverse(self) -> Generator['BaseRepresentation', None, None]: + """Recursively traverses the definition tree""" + yield self + for child in self: + yield from child.traverse() + + +class BaseLiteral(BaseRepresentation): + + def __init__(self, obj, ast: 'ASTBuilder'): + self.obj = obj + self.ast = ast + + def __str__(self): + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + +class BaseDefinition(BaseRepresentation): + + INDENT = ' ' * 4 + + def __init__(self, node: Node, ast: 'ASTBuilder'): + self.node = node + self.ast = ast + + def __str__(self): + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + @property + def obj(self): + return self.node.obj + + @property + def name(self): + return self.node.name + + def indent(self, string: str, level: int = 1) -> str: + return textwrap.indent(string, self.INDENT * level) + + def get_member_rep(self, member_name: str): + return self.ast.get_definition(self.node.get_member(member_name)) + + def get_annotation_rep(self, member_name: str): + return self.ast.get_attribute_annotation_definition(self.node.get_annotation(member_name)) + + +class BaseASTBuilder: + + # Helper methods + + def get_docstring(self, node: Node) -> Optional[BaseDefinition]: + raise NotImplementedError + + # Get representation for definitions + + def get_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + def get_attribute_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + def get_documentation_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + def get_class_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + def get_function_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + def get_module_definition(self, node: Node) -> BaseDefinition: + raise NotImplementedError + + # Get representations for values + + def get_literal(self, obj): + raise NotImplementedError + + def get_literal_for_reference(self, obj) -> BaseLiteral: + raise NotImplementedError + + def get_literal_for_type_hint(self, obj) -> BaseLiteral: + raise NotImplementedError + + def get_literal_for_value(self, obj: Node) -> BaseLiteral: + raise NotImplementedError diff --git a/tools/stubgen/definitions/__init__.py b/tools/stubgen/definitions/__init__.py new file mode 100644 index 00000000..5614888d --- /dev/null +++ b/tools/stubgen/definitions/__init__.py @@ -0,0 +1,19 @@ +"""Module contains classes representing definitions""" + +__all__ = [ + 'AttributeAnnotationDef', + 'AttributeDef', + 'ClassDef', + 'ClassMethodDef', + 'DocumentationDef', + 'FunctionDef', + 'ModuleDef', + 'StaticMethodDef', +] + +from .attribute_annotation_def import AttributeAnnotationDef +from .attribute_def import AttributeDef +from .class_def import ClassDef +from .documentation_def import DocumentationDef +from .funciton_def import ClassMethodDef, FunctionDef, StaticMethodDef +from .module_def import ModuleDef diff --git a/tools/stubgen/definitions/attribute_annotation_def.py b/tools/stubgen/definitions/attribute_annotation_def.py new file mode 100644 index 00000000..d3229f71 --- /dev/null +++ b/tools/stubgen/definitions/attribute_annotation_def.py @@ -0,0 +1,15 @@ +from ..common import Node, BaseDefinition, BaseASTBuilder + + +class AttributeAnnotationDef(BaseDefinition): + """Represents `name: annotation`""" + + def __init__(self, node: Node, ast: BaseASTBuilder): + super().__init__(node, ast) + self.annotation = self.ast.get_literal(node.obj) + + def __str__(self): + return f'{self.name}: {self.annotation}' + + def __iter__(self): + yield self.annotation diff --git a/tools/stubgen/definitions/attribute_def.py b/tools/stubgen/definitions/attribute_def.py new file mode 100644 index 00000000..f7ca5960 --- /dev/null +++ b/tools/stubgen/definitions/attribute_def.py @@ -0,0 +1,15 @@ +from ..common import Node, BaseDefinition, BaseASTBuilder + + +class AttributeDef(BaseDefinition): + """Represents `name = value`""" + + def __init__(self, node: Node, ast: BaseASTBuilder): + super().__init__(node, ast) + self.value = self.ast.get_literal(node.obj) + + def __str__(self): + return f'{self.name} = {self.value}' + + def __iter__(self): + yield self.value diff --git a/tools/stubgen/definitions/class_def.py b/tools/stubgen/definitions/class_def.py new file mode 100644 index 00000000..e0f08994 --- /dev/null +++ b/tools/stubgen/definitions/class_def.py @@ -0,0 +1,98 @@ +import inspect +from io import StringIO +from typing import get_type_hints + +from ..common import Node, BaseDefinition, BaseASTBuilder + + +class ClassDef(BaseDefinition): + + # TODO: support properties + + def __init__(self, node: Node, ast: BaseASTBuilder): + super().__init__(node, ast) + + self.docstring = self.ast.get_docstring(self.node) + + self.bases = [] + if self.obj.__bases__ != (object,): + for base in self.node.obj.__bases__: + self.bases.append(self.ast.get_literal(base)) + + self.members = {} + for member_name in self.get_public_member_names(): + # Accessing members through __dict__ is important in order to be able + # to distinguish between methods, classmethods and staticmethods + member = self.obj.__dict__[member_name] + + # TODO: dirty hack + node = self.node.get_member(member_name) + node.obj = member + + if isinstance(member, staticmethod): + node.obj = member.__func__ + definition = self.ast.get_static_method_definition(node) + elif isinstance(member, classmethod): + node.obj = member.__func__ + definition = self.ast.get_class_method_definition(node) + elif inspect.isfunction(member): + definition = self.ast.get_function_definition(node) + elif inspect.isclass(member) and member.__module__ == self.ast.module_name: + definition = self.ast.get_class_definition(node) + else: + definition = self.ast.get_attribute_definition(node) + + self.members[member_name] = definition + + self.annotations = {} + for member_name, annotation in get_type_hints(self.obj).items(): + self.annotations[member_name] = self.get_annotation_rep(member_name) + + def __str__(self): + sio = StringIO() + + if self.node.obj.__bases__ == (object,): + sio.write(f'class {self.name}:\n') + else: + bases = ', '.join(base.__name__ for base in self.node.obj.__bases__) + sio.write(f'class {self.name}({bases}):\n') + + if self.docstring: + sio.write(self.indent(f'{self.docstring}\n')) + + if self.annotations: + for name, annotation in self.annotations.items(): + sio.write(self.indent(f'{annotation}\n')) + sio.write('\n') + + if self.members: + for name, rep in self.members.items(): + sio.write(self.indent(f'{rep}\n\n')) + else: + sio.write(self.indent('pass')) + + return sio.getvalue() + + def __iter__(self): + if self.docstring: + yield self.docstring + + yield from self.bases + yield from self.members.values() + yield from self.annotations.values() + + def get_public_member_names(self): + cls = self.obj + super_cls = super(cls, cls) + + for name in dir(cls): + + # Skipping all dunder members except for __init__ + if name.startswith('__') and name != '__init__': + continue + + # Only considering members that were actually (re)defined in cls + if getattr(cls, name) is not getattr(super_cls, name, None): + yield name + + # return [name for name in dir(self.obj) if not name.startswith('__') and name != '__init__'] diff --git a/tools/stubgen/definitions/documentation_def.py b/tools/stubgen/definitions/documentation_def.py new file mode 100644 index 00000000..f801c20b --- /dev/null +++ b/tools/stubgen/definitions/documentation_def.py @@ -0,0 +1,12 @@ +import inspect + +from ..common import BaseDefinition + + +class DocumentationDef(BaseDefinition): + + def __str__(self) -> str: + return f'"""{inspect.cleandoc(self.obj)}"""\n' + + def __iter__(self): + yield from () diff --git a/tools/stubgen/definitions/funciton_def.py b/tools/stubgen/definitions/funciton_def.py new file mode 100644 index 00000000..affacbbe --- /dev/null +++ b/tools/stubgen/definitions/funciton_def.py @@ -0,0 +1,62 @@ +import inspect +from io import StringIO + +from ..common import BaseDefinition, Node, BaseASTBuilder + + +class FunctionDef(BaseDefinition): + + # TODO: support statimethods and classmethods + + def __init__(self, node: Node, ast: BaseASTBuilder): + super().__init__(node, ast) + + signature = inspect.signature(self.obj) + + params = [] + for param in signature.parameters.values(): + if param.annotation is not inspect.Parameter.empty: + param = param.replace(annotation=ast.get_literal(param.annotation)) + params.append(param) + + return_annotation = signature.return_annotation + if return_annotation is not inspect.Parameter.empty: + return_annotation = ast.get_literal(return_annotation) + + self.signature = signature.replace(parameters=params, return_annotation=return_annotation) + self.docstring = ast.get_docstring(node) + + def __str__(self): + sio = StringIO() + + if self.docstring: + sio.write(f'def {self.name}{self.signature}:\n') + sio.write(self.indent(str(self.docstring))) + sio.write(self.indent('...')) + else: + sio.write(f'def {self.name}{self.signature}: ...') + + return sio.getvalue() + + def __iter__(self): + if self.docstring: + yield self.docstring + + for param in self.signature.parameters.values(): + if param.annotation is not inspect.Parameter.empty: + yield param.annotation + + if self.signature.return_annotation is not inspect.Parameter.empty: + yield self.signature.return_annotation + + +class ClassMethodDef(FunctionDef): + + def __str__(self): + return f'@classmethod\n{super().__str__()}' + + +class StaticMethodDef(FunctionDef): + + def __str__(self): + return f'@staticmethod\n{super().__str__()}' diff --git a/tools/stubgen/definitions/module_def.py b/tools/stubgen/definitions/module_def.py new file mode 100644 index 00000000..2efac185 --- /dev/null +++ b/tools/stubgen/definitions/module_def.py @@ -0,0 +1,106 @@ + +import inspect +from collections import defaultdict +from io import StringIO +from typing import List, get_type_hints + +from ..common import Node, BaseDefinition, BaseASTBuilder + +from ..literals.reference_literal import ReferenceLiteral +from ..literals.type_hint_literal import TypeHintLiteral + + +class ModuleDef(BaseDefinition): + + # TODO: support imported functions + # TODO: support imported classes + + def __init__(self, node: Node, ast: BaseASTBuilder): + super().__init__(node, ast) + + self.docstring = self.ast.get_docstring(self.node) + + self.members = {} + for member_name in self.get_public_member_names(): + # self.members[member_name] = self.get_member_rep(member_name) + + node = self.node.get_member(member_name) + member = node.obj + + if inspect.isfunction(member): + definition = self.ast.get_function_definition(node) + elif inspect.isclass(member) and member.__module__ == self.ast.module_name: + definition = self.ast.get_class_definition(node) + else: + definition = self.ast.get_attribute_definition(node) + + self.members[member_name] = definition + + self.annotations = {} + for member_name, annotation in get_type_hints(self.obj).items(): + self.annotations[member_name] = self.get_annotation_rep(member_name) + + def __str__(self): + # TODO: print __all__ if present + + sio = StringIO() + + imports, from_imports = self.get_imports() + if imports: + for name in sorted(imports): + sio.write(f'import {name}\n') + sio.write('\n') + + if from_imports: + for key in sorted(from_imports.keys()): + names = ', '.join( + f'{name} as {import_as}' if import_as else name + for name, import_as in from_imports[key] + ) + sio.write(f'from {key} import {names}\n') + sio.write('\n') + + if self.docstring: + sio.write(str(self.docstring)) + + if self.annotations: + for name, annotation in self.annotations.items(): + sio.write(f'{annotation}\n') + sio.write('\n') + + if self.members: + for name, rep in self.members.items(): + sio.write(f'{rep}\n\n') + + return sio.getvalue() + + def __iter__(self): + if self.docstring: + yield self.docstring + + yield from self.members.values() + yield from self.annotations.values() + + def get_imports(self): + imports = set() + from_imports = defaultdict(set) + + for curr in self.traverse(): + if isinstance(curr, TypeHintLiteral): + from_imports['typing'].add((curr.type_hint_name, None)) + + if isinstance(curr, ReferenceLiteral): + if inspect.ismodule(curr.obj): + if curr.obj.__name__ != 'builtins': + imports.add(curr.obj.__name__) + elif inspect.isclass(curr.obj): + # TODO: check if a class is actually defined outside of our module + if curr.obj.__module__ != 'builtins': + from_imports[curr.obj.__module__].add((curr.obj.__qualname__.split('.')[0], None)) + + return imports, from_imports + + def get_public_member_names(self) -> List[str]: + if hasattr(self.obj, '__all__'): + return list(self.obj.__all__) + return [name for name in dir(self.obj) if not name.startswith('_')] diff --git a/tools/stubgen/import_.py b/tools/stubgen/import_.py new file mode 100644 index 00000000..e453066c --- /dev/null +++ b/tools/stubgen/import_.py @@ -0,0 +1,92 @@ +import os +import sys +from importlib import import_module +from importlib.abc import MetaPathFinder +from importlib.machinery import ModuleSpec +from importlib.util import find_spec, resolve_name, spec_from_file_location +from pkgutil import walk_packages +from typing import Optional, Tuple +from _frozen_importlib_external import _NamespaceLoader + + +class SourceFinder(MetaPathFinder): + + def __init__(self, module_root, sources_path): + self.sources_path = sources_path + self.module_root = module_root + + def find_spec(self, fullname, path, target=None): + + # To absolute import + fullname = resolve_name(fullname, path) + + # Checking if fullname is module_root or its submodule + if fullname == self.module_root: + path_prefix = self.sources_path + elif fullname.startswith(self.sources_path + '.'): + tokens = fullname[len(self.sources_path) + 1] + path_prefix = os.path.join(self.sources_path, *tokens) + else: + return None + + # Trying to guess a file + if os.path.exists(path_prefix + '.py'): + path = path_prefix + '.py' + elif os.path.exists(os.path.join(path_prefix, '__init__.py')): + path = os.path.join(path_prefix, '__init__.py') + else: + return None + + # Creating spec from a file + return spec_from_file_location(fullname, path) + + +class VirtualPackageFinder(MetaPathFinder): + + def __init__(self, module_root): + self.module_root = module_root + + def find_spec(self, fullname, path, target=None): + if self.module_root.startswith(fullname + '.'): + name = fullname.split('.')[0] + loader = _NamespaceLoader(name, path, self) + return ModuleSpec(name=name, loader=loader, is_package=True) + + return None + + +def override_module_import_path(module, sources_path): + sys.meta_path.insert(0, SourceFinder(module, sources_path)) + sys.meta_path.append(VirtualPackageFinder(module)) + + +def traverse_modules(module_root, sources_path): + yield module_root, import_module(module_root) + for _, module_name, _ in walk_packages([sources_path], prefix=module_root + '.'): + yield module_name, import_module(module_name) + + +def split_qulaname(qualname) -> Tuple[str, Optional[str]]: + """ + Splits a qualname into a module_name and a nested_name + Args: + qualname: a __qualname__ of an object + + Returns: A tuple of module_name and a nested_name where + * module_name is the largest subqualname that resolves to a module + * nested_name is the rest of the qualname or None if module_name is qualname + """ + + module_name = qualname + while module_name: + try: + find_spec(module_name) + except ImportError: + module_name = module_name[:module_name.rfind('.')] + else: + break + + if not module_name: + raise ValueError(f'Qualname {qualname:}') + + return module_name, qualname[len(module_name) + 1:] or None diff --git a/tools/stubgen/literals/__init__.py b/tools/stubgen/literals/__init__.py new file mode 100644 index 00000000..37dbfbd8 --- /dev/null +++ b/tools/stubgen/literals/__init__.py @@ -0,0 +1,8 @@ +"""Module contains classes representing values""" + +__all__ = ['AnnotationLiteral', 'ReferenceLiteral', 'TypeHintLiteral', 'ValueLiteral'] + +from .annotation_literal import AnnotationLiteral +from .reference_literal import ReferenceLiteral +from .type_hint_literal import TypeHintLiteral +from .value_literal import ValueLiteral diff --git a/tools/stubgen/literals/annotation_literal.py b/tools/stubgen/literals/annotation_literal.py new file mode 100644 index 00000000..c283a0f3 --- /dev/null +++ b/tools/stubgen/literals/annotation_literal.py @@ -0,0 +1,12 @@ +from .value_literal import ValueLiteral + + +class AnnotationLiteral(ValueLiteral): + + def __str__(self): + if self.value is type(None): # noqa: E721 + return 'None' + + return super().__str__() + + __repr__ = __str__ diff --git a/tools/stubgen/literals/reference_literal.py b/tools/stubgen/literals/reference_literal.py new file mode 100644 index 00000000..bd470cf9 --- /dev/null +++ b/tools/stubgen/literals/reference_literal.py @@ -0,0 +1,23 @@ +import inspect + +from ..common import BaseLiteral, BaseASTBuilder + + +class ReferenceLiteral(BaseLiteral): + + def __init__(self, obj, ast: BaseASTBuilder): + assert inspect.isclass(obj) or inspect.ismodule(obj) + super().__init__(obj, ast) + + def __str__(self): + # if inspect.isclass(self.obj): + # return str(f'{self.obj.__module__}.{self.obj.__qualname__}') + if inspect.isclass(self.obj): + return str(self.obj.__qualname__.split('.')[0]) + + return self.obj.__name__ + + __repr__ = __str__ + + def __iter__(self): + yield from () diff --git a/tools/stubgen/literals/type_hint_literal.py b/tools/stubgen/literals/type_hint_literal.py new file mode 100644 index 00000000..c3365fc7 --- /dev/null +++ b/tools/stubgen/literals/type_hint_literal.py @@ -0,0 +1,32 @@ +from typing import get_args + +from ..common import BaseLiteral, BaseASTBuilder + + +class TypeHintLiteral(BaseLiteral): + """Represents a type hint""" + + def __init__(self, obj, ast: BaseASTBuilder): + super().__init__(obj, ast) + + name = obj._name or obj.__origin__._name + args = get_args(obj) + + if name == 'Union' and len(args) == 2 and args[-1] is type(None): # noqa: E721 + name = 'Optional' + args = args[:-1] + + self.type_hint_name = name + self.type_hint_args = [self.ast.get_literal(arg) for arg in args] + + def __str__(self) -> str: + if self.type_hint_args: + args = ', '.join(str(arg) for arg in self.type_hint_args) + return f'{self.type_hint_name}[{args}]' + + return self.type_hint_name + + __repr__ = __str__ + + def __iter__(self): + yield from self.type_hint_args diff --git a/tools/stubgen/literals/value_literal.py b/tools/stubgen/literals/value_literal.py new file mode 100644 index 00000000..302eedd5 --- /dev/null +++ b/tools/stubgen/literals/value_literal.py @@ -0,0 +1,18 @@ +from ..common import BaseLiteral + + +class ValueLiteral(BaseLiteral): + + def __str__(self): + if self.obj is None: + return 'None' + + if isinstance(self.obj, str): + return repr(self.obj) + + return f'...{self.obj}' + + __repr__ = __str__ + + def __iter__(self): + yield from () diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..aeab89f3 --- /dev/null +++ b/tox.ini @@ -0,0 +1,29 @@ +[tox] +minversion = 3.3.0 +envlist = py37, py38, py39 +isolated_build = True +requires = setuptools >= 36.2.0 + +[gh-actions] +python = + 3.7: py37 + 3.8: py38, typing + 3.9: py39 + +[testenv] +deps = + pytest +commands = + pytest + +[testenv:release] +basepython = python3.8 +deps = + build + twine +passenv = + TWINE_USERNAME + TWINE_PASSWORD +commands = + python -m build --sdist --wheel . + twine upload dist/*