Skip to content

Commit

Permalink
Packaging, optional dependencies (#11)
Browse files Browse the repository at this point in the history
- Using pyproject.toml to define a package. 
- Optional dependencies like Haystack, DeepEval (+Langchain), Docs creation.
- Update README with instructions.
  • Loading branch information
danielfleischer authored Sep 4, 2024
1 parent 1e34ee0 commit 064fca5
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 48 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ files.
Comments, suggestions, issues and pull-requests are welcomed! ❤️

### Installation
Clone locally and run:
Clone and run:

```sh
pip install -r requirements.txt
pip install -e .
```

Optional packages can be installed:
```sh
pip install -e .[haystack]
pip install -e .[deepeval]
```

### Quick Start
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Install python packages required for building mkdocs documentation website.

``` sh
pip install -r docs/requirements.txt
pip install -e .[docs]
```

## Adding new content
Expand Down
10 changes: 8 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ files.
Comments, suggestions, issues and pull-requests are welcomed! ❤️

### Installation
Clone locally and run:
Clone and run:

```sh
pip install -r requirements.txt
pip install -e .
```

Optional packages can be installed:
```sh
pip install -e .[haystack]
pip install -e .[deepeval]
```

### Quick Start
Expand Down
7 changes: 0 additions & 7 deletions docs/requirements.txt

This file was deleted.

51 changes: 51 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[project]
name = "ragfoundry"
version = "1.1.1"
description = "Framework for enhancing LLMs for RAG tasks using fine-tuning."
readme = "README.md"
license = {file = "LICENSE"}
requires-python = ">=3.10"
dependencies = [
"bert-score>=0.3.13",
"bitsandbytes==0.42.0",
"datasets==2.16.1",
"evaluate==0.4.1",
"hydra-core==1.3.2",
"nltk==3.9",
"openai==1.23.3",
"peft==0.11.1",
"pyyaml==6.0.1",
"rouge-score==0.1.2",
"sentence-transformers==2.4.0",
"sentencepiece==0.2.0",
"torch==2.2.0",
"transformers==4.42.3",
"trl==0.8.6",
"wandb==0.16.4",
]

[project.urls]
Homepage = "https://github.com/IntelLabs/RAGFoundry"
Documentation = "https://intellabs.github.io/RAGFoundry/"

[tool.setuptools]
packages = ["ragfoundry"]

[project.optional-dependencies]
deepeval = [
"deepeval==0.21.73",
]
haystack = [
"haystack-ai==2.3.1",
"qdrant-haystack>=5.0.0",
]
docs = [
"mkdocs-gen-files>=0.5.0",
"mkdocs-material-extensions>=1.3.1",
"mkdocs-material>=9.5.30",
"mkdocstrings-python-legacy>=0.2.3",
"mkdocstrings-python>=1.10.7",
"mkdocstrings>=0.25.2",
"pymdown-extensions>=10.9",
]

13 changes: 7 additions & 6 deletions ragfoundry/evaluation/deep.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import logging
import math

from deepeval.test_case import LLMTestCase
from langchain_openai import AzureChatOpenAI

from .base import MetricBase


Expand All @@ -23,9 +20,13 @@ def __init__(
**kwargs,
):
super().__init__(key_names, **kwargs)
from deepeval.test_case import LLMTestCase
from langchain_openai import AzureChatOpenAI

self.local = True
self.query = self.key_names["query"]
self.context = self.key_names["context"]
self.test_case = LLMTestCase

self.model = AzureChatOpenAI(
api_version=api_version,
Expand Down Expand Up @@ -54,7 +55,7 @@ def measure(self, example):
output = example[self.field]
context = example[self.context]

test_case = LLMTestCase(
test_case = self.test_case(
input=query,
actual_output=output or "No answer.",
retrieval_context=[context] if isinstance(context, str) else context,
Expand Down Expand Up @@ -92,7 +93,7 @@ def measure(self, example):
output = example[self.field]
context = example[self.context]

test_case = LLMTestCase(
test_case = self.test_case(
input=query,
actual_output=output or "No answer.",
retrieval_context=[context] if isinstance(context, str) else context,
Expand Down Expand Up @@ -126,7 +127,7 @@ def measure(self, example):
output = example[self.field]
context = example[self.context]

test_case = LLMTestCase(
test_case = self.test_case(
input="",
actual_output=output,
context=[context] if isinstance(context, str) else context,
Expand Down
13 changes: 8 additions & 5 deletions ragfoundry/evaluation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
import string
from collections import Counter, defaultdict

import evaluate
import sklearn

from .base import MetricBase


Expand All @@ -19,6 +16,8 @@ def __init__(self, key_names, metric_names: list[str], **kwargs):
key_names (dict): A dictionary containing the field names.
metric_names (list[str]): A list of metric names.
"""
import evaluate

super().__init__(key_names, **kwargs)
self.metric_names = metric_names
self.metric = evaluate.combine(metric_names)
Expand Down Expand Up @@ -63,10 +62,14 @@ class Classification(MetricBase):
def __init__(
self, key_names: dict, mapping: dict, else_value: int = 2, **kwargs
) -> None:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

super().__init__(key_names, **kwargs)
self.local = False
self.mapping = mapping
self.else_value = else_value
self.precision_recall_fn = precision_recall_fscore_support
self.accuracy_fn = accuracy_score

def measure(self, example: dict):
inputs = example[self.field]
Expand All @@ -83,10 +86,10 @@ def measure(self, example: dict):
self.mapping.get(normalize_text(t).strip(), self.else_value) for t in targets
]

precision, recall, f1, _ = sklearn.metrics.precision_recall_fscore_support(
precision, recall, f1, _ = self.precision_recall_fn(
targets, inputs, average="macro"
)
accuracy = sklearn.metrics.accuracy_score(targets, inputs)
accuracy = self.accuracy_fn(targets, inputs)

return {
"accuracy": float(accuracy),
Expand Down
2 changes: 1 addition & 1 deletion ragfoundry/processing/global_steps/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,5 @@ def __init__(self, k, output_key="fewshot", input_dataset=None, **kwargs):
output_key=output_key,
input_key=None,
input_dataset=input_dataset,
**kwargs
**kwargs,
)
4 changes: 2 additions & 2 deletions ragfoundry/processing/local_steps/retrievers/haystack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from haystack import Pipeline

from ...step import LocalStep


Expand All @@ -10,6 +8,8 @@ class HaystackRetriever(LocalStep):

def __init__(self, pipeline_or_yaml_path, docs_key, query_key, **kwargs):
super().__init__(**kwargs)
from haystack import Pipeline

if isinstance(pipeline_or_yaml_path, str):
self.pipe = Pipeline.load(open(pipeline_or_yaml_path))
else:
Expand Down
22 changes: 0 additions & 22 deletions requirements.txt

This file was deleted.

0 comments on commit 064fca5

Please sign in to comment.