diff --git a/llments/eval/toxicity.py b/llments/eval/toxicity.py new file mode 100644 index 0000000..bfa28a6 --- /dev/null +++ b/llments/eval/toxicity.py @@ -0,0 +1,183 @@ +"""Evaluators for toxicity analysis.""" + +import os +import requests # type: ignore +import json +import time +import tqdm +from typing import Dict +from llments.eval.eval import Evaluator, EvalContext + + +class ToxicityEvaluator(Evaluator): + """A class that defines an evaluation function for toxicity analysis.""" + + def __init__(self, api_key: str): + """Initializes the ToxicityEvaluator class. + + Args: + api_key (str): API key for accessing the Perspective API. + """ + self.api_key = api_key + self.url = f"https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze?key={api_key}" + self.categories = [ + "toxicity", + "severe_toxicity", + "insult", + "flirtation", + "profanity", + "sexually_explicit", + "threat", + "identity_attack", + ] + self.languages = ["en"] + + def _get_toxicity_scores(self, text: str) -> Dict[str, float]: + """Helper method to get toxicity scores for a single text input from the API. + + Args: + text (str): The text to evaluate. + + Returns: + Dict[str, float]: A dictionary containing toxicity scores for various categories. + """ + if len(text.strip()) == 0: + # Return zeros for empty text + return {category: 0.0 for category in self.categories} + + data_dict = { + "comment": {"text": text}, + "languages": self.languages, + "requestedAttributes": {cat.upper(): {} for cat in self.categories}, + } + + # Make the API request + response = requests.post(url=self.url, data=json.dumps(data_dict)) + response_dict = json.loads(response.content) + + # Return scores or zeros if no valid response + if "attributeScores" in response_dict: + scores = {} + for category in self.categories: + score = response_dict["attributeScores"][category.upper()][ + "spanScores" + ][0]["score"]["value"] + scores[category] = score + return scores + else: + # Return zeros for invalid responses + return {category: 0.0 for category in self.categories} + + # override the evaluate method from the Evaluator class + # for better design logic, the evaluate() and evaluate_batch() methods + # only support the "toxicity" context + def evaluate(self, hyp: str, context: EvalContext | None = None) -> float: + """Returns the toxicity score for a given hypothesis. + + Args: + hyp (str): The hypothesized string (e.g. a system output). + context (EvalContext | None): The reference context to condition on. + + Returns: + float: The toxicity score, usually between 0 and 1 inclusive. + """ + if context is not None: + raise Warning( + "This method only supports the 'toxicity' metric. \ + Please use evaluate_multiple() for other metrics." + ) + + return self._get_toxicity_scores(hyp)["toxicity"] + + # override the evaluate_batch method from the Evaluator class + def evaluate_batch( + self, + hyps: list[str], + contexts: list[EvalContext] | None = None, + minibatch_size: int | None = None, + show_progress: bool = False, + ) -> list[float]: + """Evaluate the toxicity of many hypotheses at once. + + Args: + hyps (list[str]): A list of hypothesized strings (e.g. system outputs). + contexts (list[EvalContext] | None): The reference context to condition on. + minibatch_size (int | None): The size of the minibatch to use, + None guesses a good size automatically. + show_progress (bool): Whether to show a progress bar. + + Returns: + list[float]: A list of toxicity scores, usually between 0 and 1 inclusive. + """ + if contexts is not None: + # raise a warning that this method only supports the "toxicity" context + raise Warning( + "This method only supports the 'toxicity' metric. \ + Please use evaluate_batch_multiple() for other metrics." + ) + + if show_progress: + hyps = tqdm.tqdm(hyps, desc="Evaluating") + + res = [] + for hyp in hyps: + # to avoid rate limiting + time.sleep(1.2) + res.append(self.evaluate(hyp, contexts)) + return res + + # override the evaluate_multiple method from the Evaluator class + # set the default contexts to ["toxicity"] + + def evaluate_multiple( + self, + hyp: str, + metrics: list[str] = ["toxicity"], + show_progress: bool = False, + ) -> list[float]: + """Evaluate multiple metrics at once. + + Args: + hyp (str): The hypothesized string. + metrics (list[str]): A list of metrics to evaluate. + show_progress (bool): Whether to show a progress bar. + + Returns: + Dict[str, float]: A dictionary of evaluation scores, usually between 0 and 1 inclusive. + """ + for metric in metrics: + if metric not in self.categories: + raise ValueError(f"Invalid metric: {metric}") + + # get the toxicity scores based on the contexts + scores = self._get_toxicity_scores(hyp) + + # return the scores for the specified contexts + return [scores[metric] for metric in metrics] + + def evaluate_batch_multiple( + self, + hyps: list[str], + metrics: list[str] = ["toxicity"], + show_progress: bool = False, + ) -> list[list[float]]: + """Evaluate multiple metrics for many hypotheses at once. + + Args: + hyps (list[str]): A list of hypothesized strings. + metrics (list[str]): A list of metrics to evaluate. + show_progress (bool): Whether to show a progress bar. + + Returns: + list[list[float]]: A list of lists of evaluation scores, usually between 0 and 1 inclusive. + """ + if show_progress: + hyps = tqdm.tqdm(hyps, desc="Evaluating") + + res = [] + for hyp in hyps: + # to avoid rate limiting + time.sleep(1.2) + res.append(self.evaluate_multiple(hyp, metrics)) + + return res diff --git a/llments/lm/base/dataset_ml.py b/llments/lm/base/dataset_lm.py similarity index 100% rename from llments/lm/base/dataset_ml.py rename to llments/lm/base/dataset_lm.py