Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
534 changes: 277 additions & 257 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ openai = "^2.12.0"
pyarrow = "^22.0.0"
tiktoken = "^0.12.0"
uvicorn = ">=0.38,<0.41"
modellogger = {git = "https://github.com/mlcommons/modellogger.git", rev = "57e2028f2f1b3badba45ea0bb49a057e9a665df6"}

[tool.poetry.group.dev.dependencies]
pytest-datafiles = "^3.0.0"
Expand Down
4 changes: 2 additions & 2 deletions src/modelbench/benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
from datetime import datetime
from typing import Any, Iterable, Optional, Sequence

from modellogger.log_config import get_logger
from pydantic import BaseModel
from tqdm import tqdm

from modelbench.benchmark_runner_items import ModelgaugeTestWrapper, TestRunItem, Timer
from modelbench.benchmarks import BenchmarkDefinition, BaseBenchmarkScore
from modelbench.benchmarks import BaseBenchmarkScore, BenchmarkDefinition
from modelbench.cache import DiskCache, MBCache
from modelbench.run_journal import RunJournal
from modelgauge.annotator import Annotator
from modelgauge.annotator_registry import ANNOTATORS
from modelgauge.base_test import PromptResponseTest, TestResult
from modelgauge.config import raise_if_missing_from_config
from modelgauge.log_config import get_logger
from modelgauge.monitoring import PROMETHEUS
from modelgauge.pipeline import NullCache, Pipe, Pipeline, Sink, Source
from modelgauge.pipeline_runner import PipelineRunner
Expand Down
10 changes: 3 additions & 7 deletions src/modelbench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import faulthandler
import io
import json
import logging
import os

# silence Together's upgrade message, as the new library is not out of beta
Expand All @@ -18,22 +17,19 @@
import click
import termcolor
from click import echo
from modellogger.log_config import configure_logging
from rich.console import Console
from rich.table import Table

import modelgauge.annotators.cheval.registration # noqa: F401
from modelbench.benchmark_runner import BenchmarkRun, BenchmarkRunner, JsonRunTracker, TqdmRunTracker
from modelbench.benchmarks import GeneralPurposeAiChatBenchmarkV1, SecurityBenchmark
from modelbench.consistency_checker import (
ConsistencyChecker,
summarize_consistency_check_results,
)
from modelbench.consistency_checker import ConsistencyChecker, summarize_consistency_check_results
from modelbench.record import dump_json
from modelbench.standards import Standards
from modelgauge.config import load_secrets_from_config, write_default_config
from modelgauge.load_namespaces import load_namespaces
from modelgauge.locales import DEFAULT_LOCALE, LOCALES
from modelgauge.log_config import get_file_logging_handler
from modelgauge.monitoring import PROMETHEUS
from modelgauge.preflight import check_secrets, make_sut
from modelgauge.prompt_sets import GENERAL_PROMPT_SETS, SECURITY_JAILBREAK_PROMPT_SETS
Expand Down Expand Up @@ -142,7 +138,7 @@ def cli(ctx: click.Context, run_path) -> None:
log_dir = run_path / "logs"
log_dir.mkdir(exist_ok=True, parents=True)
filename = log_dir / f'modelbench-{datetime.now().strftime("%y%m%d-%H%M%S")}.log'
logging.basicConfig(level=logging.DEBUG, handlers=[get_file_logging_handler(filename)], force=True)
configure_logging(app_name="modelbench", log_file=filename)
write_default_config()
load_namespaces(disable_progress_bar=True)

Expand Down
8 changes: 3 additions & 5 deletions src/modelgauge/annotation_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import time

from modellogger.log_config import get_logger
from pydantic import BaseModel

from modelgauge.annotator import Annotator
from modelgauge.dataset import AnnotationDataset, PromptResponseDataset
from modelgauge.log_config import get_logger
from modelgauge.pipeline import CachingPipe, Pipe, Sink, Source
from modelgauge.single_turn_prompt_response import (
AnnotatedSUTInteraction,
SUTInteraction,
)
from modelgauge.single_turn_prompt_response import AnnotatedSUTInteraction, SUTInteraction

logger = get_logger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions src/modelgauge/annotators/llama_guard_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
from string import Template
from typing import Callable, Dict, List, Optional

from pydantic import BaseModel
from modellogger.log_config import get_logger

from modelgauge.annotation import SafetyAnnotation
from modelgauge.annotator import Annotator
from modelgauge.annotator_registry import ANNOTATORS
from modelgauge.log_config import get_logger
from modelgauge.prompt import ChatPrompt, TextPrompt
from modelgauge.secret_values import InjectSecret
from modelgauge.single_turn_prompt_response import TestItem
Expand Down
6 changes: 2 additions & 4 deletions src/modelgauge/cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
import os
import pathlib
import warnings
from typing import Optional

import click
from modellogger.log_config import get_logger

from modelgauge.annotator import Annotator
from modelgauge.annotator_registry import ANNOTATORS
Expand Down Expand Up @@ -36,7 +36,7 @@
from modelgauge.sut_registry import SUTS
from modelgauge.test_registry import TESTS

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


@cli.command(name="list")
Expand Down Expand Up @@ -338,8 +338,6 @@ def run_job(
If running a SUT, the file must have 'UID' and 'Text' columns. The output will be saved to a CSV file.
If running ONLY annotators, the file must have 'UID', 'Prompt', 'SUT', and 'Response' columns. The output will be saved to a json lines file.
"""
logging.basicConfig(level=logging.DEBUG if debug else logging.INFO)

# TODO: break this function up. It's branching too much
# make sure the job has everything it needs to run
secrets = load_secrets_from_config()
Expand Down
4 changes: 3 additions & 1 deletion src/modelgauge/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys

import click
from modellogger.log_config import configure_logging

from modelgauge.annotator_registry import ANNOTATORS
from modelgauge.cli_lazy import LOAD_ALL, LazyModuleImportGroup
from modelgauge.config import write_default_config
Expand All @@ -26,7 +28,7 @@
def cli():
"""Run the ModelGauge library from the command line."""
# To add a command, decorate your function with @cli.command().

configure_logging(app_name="modelgauge")
# Always create the config directory if it doesn't already exist.
write_default_config()

Expand Down
2 changes: 1 addition & 1 deletion src/modelgauge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from typing import Dict, Mapping, Sequence

import tomli
from modellogger.log_config import get_logger

from modelgauge import config_templates
from modelgauge.log_config import get_logger
from modelgauge.secret_values import MissingSecretValues, RawSecrets, SecretDescription

DEFAULT_CONFIG_DIR = "config"
Expand Down
3 changes: 1 addition & 2 deletions src/modelgauge/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
import time
from typing import List, Optional, Set, Type, TypeVar

from modellogger.log_config import get_logger
from tqdm import tqdm

from modelgauge.log_config import get_logger

# Type vars helpful in defining templates.
_InT = TypeVar("_InT")

Expand Down
28 changes: 0 additions & 28 deletions src/modelgauge/log_config.py

This file was deleted.

14 changes: 5 additions & 9 deletions src/modelgauge/pipeline_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,14 @@
from abc import ABC, abstractmethod
from multiprocessing.pool import ThreadPool

from modelgauge.annotation_pipeline import (
AnnotatorAssigner,
AnnotatorSink,
AnnotatorSource,
AnnotatorWorkers,
)
from modellogger.log_config import get_logger

from modelgauge.annotation_pipeline import AnnotatorAssigner, AnnotatorSink, AnnotatorSource, AnnotatorWorkers
from modelgauge.dataset import AnnotationDataset, PromptDataset, PromptResponseDataset
from modelgauge.log_config import get_logger
from modelgauge.model_options import ModelOptions
from modelgauge.pipeline import Pipeline
from modelgauge.prompt_pipeline import PromptSink, PromptSource, PromptSutAssigner, PromptSutWorkers
from modelgauge.ready import ReadyResponses, Readyable
from modelgauge.model_options import ModelOptions
from modelgauge.ready import Readyable, ReadyResponses

logger = get_logger(__name__)

Expand Down
7 changes: 4 additions & 3 deletions src/modelgauge/prompt_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import time
from typing import Optional

from modellogger.log_config import get_logger

from modelgauge.dataset import PromptDataset, PromptResponseDataset
from modelgauge.log_config import get_logger
from modelgauge.model_options import ModelOptions
from modelgauge.pipeline import CachingPipe, Pipe, Sink, Source
from modelgauge.prompt import TextPrompt
from modelgauge.single_turn_prompt_response import SUTInteraction, TestItem
from modelgauge.sut import PromptResponseSUT, SUT, SUTResponse
from modelgauge.model_options import ModelOptions
from modelgauge.sut import SUT, PromptResponseSUT, SUTResponse

logger = get_logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/modelgauge/retry_decorator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import time

from modelgauge.log_config import get_logger
from modellogger.log_config import get_logger

BASE_RETRY_COUNT = 3
MAX_RETRY_DURATION = 86400 # 1 day in seconds
Expand Down
15 changes: 5 additions & 10 deletions src/modelgauge/suts/google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,17 @@
from typing import Optional

from google import genai
from google.api_core.exceptions import (
InternalServerError,
ResourceExhausted,
RetryError,
TooManyRequests,
)
from google.genai.types import GenerateContentConfig, GenerateContentResponse, ThinkingConfig, FinishReason
from google.api_core.exceptions import InternalServerError, ResourceExhausted, RetryError, TooManyRequests
from google.genai.types import FinishReason, GenerateContentConfig, GenerateContentResponse, ThinkingConfig
from modellogger.log_config import get_logger
from pydantic import BaseModel

from modelgauge.general import APIException
from modelgauge.log_config import get_logger
from modelgauge.model_options import ModelOptions
from modelgauge.prompt import TextPrompt
from modelgauge.retry_decorator import retry
from modelgauge.secret_values import InjectSecret, loggable_secret, RequiredSecret, SecretDescription
from modelgauge.secret_values import InjectSecret, RequiredSecret, SecretDescription, loggable_secret
from modelgauge.sut import REFUSAL_RESPONSE, PromptResponseSUT, SUTResponse # usort: skip
from modelgauge.model_options import ModelOptions
from modelgauge.sut_capabilities import AcceptsTextPrompt
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS
Expand Down
11 changes: 4 additions & 7 deletions src/modelgauge/suts/huggingface_sut_factory.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import logging

import huggingface_hub as hfh
from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken
from modelgauge.dynamic_sut_factory import (
DynamicSUTFactory,
ModelNotSupportedError,
ProviderNotFoundError,
)
from modellogger.log_config import get_logger

from modelgauge.log_config import get_logger
from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken
from modelgauge.dynamic_sut_factory import DynamicSUTFactory, ModelNotSupportedError, ProviderNotFoundError
from modelgauge.secret_values import InjectSecret, RawSecrets
from modelgauge.sut_definition import SUTDefinition
from modelgauge.suts.huggingface_chat_completion import (
Expand Down
6 changes: 3 additions & 3 deletions src/modelgauge/suts/together_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
from typing import Any, List, Optional

import requests # type:ignore
from pydantic import BaseModel, Field
from modellogger.log_config import get_logger
from pydantic import BaseModel
from requests.adapters import HTTPAdapter, Retry # type:ignore

from modelgauge.auth.together_key import TogetherApiKey
from modelgauge.general import APIException
from modelgauge.log_config import get_logger
from modelgauge.prompt import ChatPrompt, ChatRole, TextPrompt
from modelgauge.prompt_formatting import format_chat
from modelgauge.tokenizer import GeneralTokenizer
from modelgauge.secret_values import InjectSecret
from modelgauge.sut import PromptResponseSUT, SUTResponse
from modelgauge.model_options import ModelOptions, TokenProbability, TopTokens
from modelgauge.sut_capabilities import AcceptsChatPrompt, AcceptsTextPrompt, ProducesPerTokenLogProbabilities
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS
from modelgauge.tokenizer import GeneralTokenizer

logger = get_logger(__name__)

Expand Down