diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d319354..b3f6b74 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,6 @@ +--- name: Continuous Integration -on: # yamllint disable-line rule:truthy +on: # yamllint disable-line rule:truthy push: branches: - main @@ -18,7 +19,7 @@ jobs: - name: Install ruff run: pip install ruff - name: Run ruff - run: ruff --check . + run: ruff check . python-mypy: runs-on: ubuntu-latest steps: @@ -31,14 +32,6 @@ jobs: uses: pre-commit/action@v3.0.0 with: extra_args: mypy --all-files - lint-requirements-txt: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: lint - uses: pre-commit/action@v3.0.0 - with: - extra_args: requirements-txt-fixer --all-files lint-markdown: runs-on: ubuntu-latest steps: @@ -75,19 +68,3 @@ jobs: uses: pre-commit/action@v3.0.0 with: extra_args: shellcheck --all-files - lint-trailing-whitespace: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: lint - uses: pre-commit/action@v3.0.0 - with: - extra_args: trailing-whitespace --all-files - lint-eof-newline: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: lint - uses: pre-commit/action@v3.0.0 - with: - extra_args: end-of-file-fixer --all-files diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 168d102..36cd252 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,9 @@ +--- name: Release workflow - -on: +on: # yamllint disable-line rule:truthy push: tags: - "v[0123456789].*" - jobs: release: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 477bb4f..b148a3e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ __pycache__ build llments.egg-info .vscode -.pytest_cache \ No newline at end of file +.pytest_cache diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 053b197..53334c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,20 @@ +--- repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.5 + rev: v0.2.1 hooks: - id: ruff - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.6.1' + rev: v1.8.0 hooks: - - id: mypy + - id: mypy - repo: https://github.com/DavidAnson/markdownlint-cli2 - rev: v0.10.0 + rev: v0.12.1 hooks: - id: markdownlint-cli2 - repo: https://github.com/adrienverge/yamllint - rev: v1.33.0 + rev: v1.34.0 hooks: - id: yamllint - repo: https://github.com/koalaman/shellcheck-precommit diff --git a/README.md b/README.md index b31b564..ecb4f15 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,17 @@ ## Development Information +To start developing, install the development dependencies and pre-commit hooks: + ```bash -pip install . +pip install ".[dev]" pre-commit install ``` + +There are several [pre-commit hooks](https://pre-commit.com/) that will run on every +commit to perform formatting, typing, and linting. + +* `ruff` - Runs formatting, import sorting, and linting. +* `mypy` - Runs type checking. +* `markdownlint` - Runs markdown linting. +* `yamllint` - Runs YAML linting. diff --git a/llments/distance/distance.py b/llments/distance/distance.py index f327d9b..54466bd 100644 --- a/llments/distance/distance.py +++ b/llments/distance/distance.py @@ -1,12 +1,10 @@ - import abc from llments.lm.lm import LanguageModel class Distance: - - @abc.abstractclassmethod + @abc.abstractmethod def distance(self, lm1: LanguageModel, lm2: LanguageModel) -> float: """Returns a distance between two language models.""" ... diff --git a/llments/distance/norm.py b/llments/distance/norm.py index 1000731..0257821 100644 --- a/llments/distance/norm.py +++ b/llments/distance/norm.py @@ -1,4 +1,3 @@ - from llments.lm.lm import LanguageModel diff --git a/llments/eval/eval.py b/llments/eval/eval.py index 00eddb7..2e9acfa 100644 --- a/llments/eval/eval.py +++ b/llments/eval/eval.py @@ -1,4 +1,3 @@ - import abc import dataclasses @@ -6,7 +5,7 @@ class PairwiseEvaluator: """A class that defines an evaluation function, assessing a hypothesized string.""" - @abc.abstractclassmethod + @abc.abstractmethod def evaluate(self, hyp: str, ref: str) -> float: """Returns an evaluation score between 0 and 1 for two strings. @@ -25,7 +24,7 @@ class EvaluatorMetadata: class GeneralEvaluator: """A class that defines an evaluation function, assessing a hypothesized string.""" - @abc.abstractclassmethod + @abc.abstractmethod def evaluate(self, hyp: str, ref: EvaluatorMetadata) -> float: """Returns an evaluation score between 0 and 1 for two strings. diff --git a/llments/lm/empirical.py b/llments/lm/empirical.py index 5b10c00..417fed8 100644 --- a/llments/lm/empirical.py +++ b/llments/lm/empirical.py @@ -1,3 +1,4 @@ +from typing import Any from llments.lm.lm import LanguageModel import random import json @@ -10,10 +11,10 @@ def __init__(self, data: list[str], probs: list[float] | None = None): probs = [1 / len(data)] * len(data) self.data = pd.DataFrame({"text": data, "prob": probs}) - def sample(self, condition: str | None): + def generate(self, condition: str | None, **kwargs: Any) -> str: """Sample from the language model, possibly conditioned on a prefix.""" if condition is None: - return random.choice(self.data["text"], p=self.data["probs"]) + return random.choices(self.data["text"], weights=self.data["probs"])[0] else: # Filter to only those that start with the condition filtered_df = self.data[self.data["text"].str.startswith(condition)] @@ -24,9 +25,9 @@ def sample(self, condition: str | None): ) # Normalize the probabilities filtered_df["prob"] = filtered_df["prob"] / filtered_df["prob"].sum() - return random.choice(filtered_df["text"], p=filtered_df["probs"]) + return random.choices(filtered_df["text"], weights=filtered_df["probs"])[0] - def fit(self, target: LanguageModel): + def fit(self, target: LanguageModel, task_description: str | None = None): raise ValueError( "Cannot fit an empirical distribution to another distribution." ) diff --git a/llments/lm/lm.py b/llments/lm/lm.py index 54ff1eb..ead0ebb 100644 --- a/llments/lm/lm.py +++ b/llments/lm/lm.py @@ -2,7 +2,7 @@ class LanguageModel: - @abc.abstractclassmethod + @abc.abstractmethod def generate( self, condition: str | None, @@ -11,7 +11,7 @@ def generate( """Generate from the language model, possibly conditioned on a prefix.""" ... - @abc.abstractclassmethod + @abc.abstractmethod def fit( self, target: "LanguageModel", task_description: str | None = None ) -> "LanguageModel": diff --git a/llments/lm/lm_loader.py b/llments/lm/lm_loader.py index ef588c6..d15e476 100644 --- a/llments/lm/lm_loader.py +++ b/llments/lm/lm_loader.py @@ -1,5 +1,3 @@ - - from llments.lm.lm import LanguageModel diff --git a/llments/lm/lm_loader_test.py b/llments/lm/lm_loader_test.py index dd8fbf4..ee9b38b 100644 --- a/llments/lm/lm_loader_test.py +++ b/llments/lm/lm_loader_test.py @@ -1,4 +1,3 @@ - def test_load_from_spec_file(): """Test that load_from_specification_file() loads a language model.""" raise NotImplementedError