Packaging, optional dependencies (#11)

- Using pyproject.toml to define a package. - Optional dependencies like Haystack, DeepEval (+Langchain), Docs creation. - Update README with instructions.
IntelLabs · Sep 4, 2024 · 064fca5 · 064fca5
1 parent 1e34ee0
commit 064fca5
Show file tree

Hide file tree

Showing 10 changed files with 86 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -15,10 +15,16 @@ files.
 Comments, suggestions, issues and pull-requests are welcomed! ❤️
 
 ### Installation
-Clone locally and run:
+Clone and run:
 
 ```sh
-pip install -r requirements.txt
+pip install -e .
+```
+
+Optional packages can be installed:
+```sh
+pip install -e .[haystack]
+pip install -e .[deepeval]
 ```
 
 ### Quick Start

diff --git a/docs/README.md b/docs/README.md
@@ -5,7 +5,7 @@
 Install python packages required for building mkdocs documentation website.
 
 ``` sh
-pip install -r docs/requirements.txt
+pip install -e .[docs]
 ```
 
 ## Adding new content

diff --git a/docs/index.md b/docs/index.md
@@ -15,10 +15,16 @@ files.
 Comments, suggestions, issues and pull-requests are welcomed! ❤️
 
 ### Installation
-Clone locally and run:
+Clone and run:
 
 ```sh
-pip install -r requirements.txt
+pip install -e .
+```
+
+Optional packages can be installed:
+```sh
+pip install -e .[haystack]
+pip install -e .[deepeval]
 ```
 
 ### Quick Start

diff --git a/docs/requirements.txt b/docs/requirements.txt
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,51 @@
+[project]
+name = "ragfoundry"
+version = "1.1.1"
+description = "Framework for enhancing LLMs for RAG tasks using fine-tuning."
+readme = "README.md"
+license = {file = "LICENSE"}
+requires-python = ">=3.10"
+dependencies = [
+    "bert-score>=0.3.13",
+    "bitsandbytes==0.42.0",
+    "datasets==2.16.1",
+    "evaluate==0.4.1",
+    "hydra-core==1.3.2",
+    "nltk==3.9",
+    "openai==1.23.3",
+    "peft==0.11.1",
+    "pyyaml==6.0.1",
+    "rouge-score==0.1.2",
+    "sentence-transformers==2.4.0",
+    "sentencepiece==0.2.0",
+    "torch==2.2.0",
+    "transformers==4.42.3",
+    "trl==0.8.6",
+    "wandb==0.16.4",
+]
+
+[project.urls]
+Homepage = "https://github.com/IntelLabs/RAGFoundry"
+Documentation = "https://intellabs.github.io/RAGFoundry/"
+
+[tool.setuptools]
+packages = ["ragfoundry"]
+
+[project.optional-dependencies]
+deepeval = [
+    "deepeval==0.21.73",
+]
+haystack = [
+    "haystack-ai==2.3.1",
+    "qdrant-haystack>=5.0.0",
+]
+docs = [
+    "mkdocs-gen-files>=0.5.0",
+    "mkdocs-material-extensions>=1.3.1",
+    "mkdocs-material>=9.5.30",
+    "mkdocstrings-python-legacy>=0.2.3",
+    "mkdocstrings-python>=1.10.7",
+    "mkdocstrings>=0.25.2",
+    "pymdown-extensions>=10.9",
+]
+
diff --git a/ragfoundry/evaluation/deep.py b/ragfoundry/evaluation/deep.py
@@ -1,9 +1,6 @@
 import logging
 import math
 
-from deepeval.test_case import LLMTestCase
-from langchain_openai import AzureChatOpenAI
-
 from .base import MetricBase
 
 
@@ -23,9 +20,13 @@ def __init__(
         **kwargs,
     ):
         super().__init__(key_names, **kwargs)
+        from deepeval.test_case import LLMTestCase
+        from langchain_openai import AzureChatOpenAI
+
         self.local = True
         self.query = self.key_names["query"]
         self.context = self.key_names["context"]
+        self.test_case = LLMTestCase
 
         self.model = AzureChatOpenAI(
             api_version=api_version,
@@ -54,7 +55,7 @@ def measure(self, example):
         output = example[self.field]
         context = example[self.context]
 
-        test_case = LLMTestCase(
+        test_case = self.test_case(
             input=query,
             actual_output=output or "No answer.",
             retrieval_context=[context] if isinstance(context, str) else context,
@@ -92,7 +93,7 @@ def measure(self, example):
         output = example[self.field]
         context = example[self.context]
 
-        test_case = LLMTestCase(
+        test_case = self.test_case(
             input=query,
             actual_output=output or "No answer.",
             retrieval_context=[context] if isinstance(context, str) else context,
@@ -126,7 +127,7 @@ def measure(self, example):
         output = example[self.field]
         context = example[self.context]
 
-        test_case = LLMTestCase(
+        test_case = self.test_case(
             input="",
             actual_output=output,
             context=[context] if isinstance(context, str) else context,

diff --git a/ragfoundry/evaluation/metrics.py b/ragfoundry/evaluation/metrics.py
@@ -2,9 +2,6 @@
 import string
 from collections import Counter, defaultdict
 
-import evaluate
-import sklearn
-
 from .base import MetricBase
 
 
@@ -19,6 +16,8 @@ def __init__(self, key_names, metric_names: list[str], **kwargs):
             key_names (dict): A dictionary containing the field names.
             metric_names (list[str]): A list of metric names.
         """
+        import evaluate
+
         super().__init__(key_names, **kwargs)
         self.metric_names = metric_names
         self.metric = evaluate.combine(metric_names)
@@ -63,10 +62,14 @@ class Classification(MetricBase):
     def __init__(
         self, key_names: dict, mapping: dict, else_value: int = 2, **kwargs
     ) -> None:
+        from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+
         super().__init__(key_names, **kwargs)
         self.local = False
         self.mapping = mapping
         self.else_value = else_value
+        self.precision_recall_fn = precision_recall_fscore_support
+        self.accuracy_fn = accuracy_score
 
     def measure(self, example: dict):
         inputs = example[self.field]
@@ -83,10 +86,10 @@ def measure(self, example: dict):
             self.mapping.get(normalize_text(t).strip(), self.else_value) for t in targets
         ]
 
-        precision, recall, f1, _ = sklearn.metrics.precision_recall_fscore_support(
+        precision, recall, f1, _ = self.precision_recall_fn(
             targets, inputs, average="macro"
         )
-        accuracy = sklearn.metrics.accuracy_score(targets, inputs)
+        accuracy = self.accuracy_fn(targets, inputs)
 
         return {
             "accuracy": float(accuracy),

diff --git a/ragfoundry/processing/global_steps/sampling.py b/ragfoundry/processing/global_steps/sampling.py
@@ -101,5 +101,5 @@ def __init__(self, k, output_key="fewshot", input_dataset=None, **kwargs):
             output_key=output_key,
             input_key=None,
             input_dataset=input_dataset,
-            **kwargs
+            **kwargs,
         )
diff --git a/ragfoundry/processing/local_steps/retrievers/haystack.py b/ragfoundry/processing/local_steps/retrievers/haystack.py
@@ -1,5 +1,3 @@
-from haystack import Pipeline
-
 from ...step import LocalStep
 
 
@@ -10,6 +8,8 @@ class HaystackRetriever(LocalStep):
 
     def __init__(self, pipeline_or_yaml_path, docs_key, query_key, **kwargs):
         super().__init__(**kwargs)
+        from haystack import Pipeline
+
         if isinstance(pipeline_or_yaml_path, str):
             self.pipe = Pipeline.load(open(pipeline_or_yaml_path))
         else:

diff --git a/requirements.txt b/requirements.txt