Merge branch 'master' into v3.0-release

UKPLab · Jun 7, 2024 · 8a02e45 · 8a02e45
2 parents ba908be + f012ab3
commit 8a02e45
Show file tree

Hide file tree

Showing 60 changed files with 585 additions and 348 deletions.
diff --git a/docs/_static/html/models_en_sentence_embeddings.html b/docs/_static/html/models_en_sentence_embeddings.html
@@ -567,7 +567,7 @@
                 },
                 {
                     "name": "multi-qa-MiniLM-L6-dot-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/nreimers/MiniLM-L6-H384-uncased" target="_blank">nreimers/MiniLM-L6-H384-uncased</a>',
                     "pooling": "CLS Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -583,7 +583,7 @@
                 },
                 {
                     "name": "multi-qa-MiniLM-L6-cos-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/nreimers/MiniLM-L6-H384-uncased" target="_blank">nreimers/MiniLM-L6-H384-uncased</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -600,7 +600,7 @@
                 },
                 {
                     "name": "multi-qa-distilbert-dot-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
                     "pooling": "CLS Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -616,7 +616,7 @@
                 },
                 {
                     "name": "multi-qa-distilbert-cos-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -633,7 +633,7 @@
                 },
                 {
                     "name": "multi-qa-mpnet-base-dot-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/microsoft/mpnet-base" target="_blank"microsoft/mpnet-base</a>',
                     "pooling": "CLS Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -650,7 +650,7 @@
                 },
                 {
                     "name": "multi-qa-mpnet-base-cos-v1",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
                     "base_model": '<a href="https://huggingface.co/microsoft/mpnet-base" target="_blank">microsoft/mpnet-base</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "215M (question, answer) pairs from diverse sources.",
@@ -666,7 +666,7 @@
                 },
                 {
                     "name": "msmarco-distilbert-dot-v5",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
                     "base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
@@ -682,7 +682,7 @@
                 },
                 {
                     "name": "msmarco-bert-base-dot-v5",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
                     "base_model": '<a href="https://huggingface.co/Luyu/co-condenser-marco" target="_blank">Luyu/co-condenser-marco</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
@@ -698,7 +698,7 @@
                 },
                 {
                     "name": "msmarco-distilbert-base-tas-b",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
                     "base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
@@ -778,7 +778,7 @@
                 },
                 {
                     "name": "gtr-t5-base",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
                     "base_model": '<a href="https://huggingface.co/t5-base" target="_blank">t5-base</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
@@ -794,7 +794,7 @@
                 },
                 {
                     "name": "gtr-t5-large",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
                     "base_model": '<a href="https://huggingface.co/t5-large" target="_blank">t5-large</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
@@ -810,7 +810,7 @@
                 },
                 {
                     "name": "gtr-t5-xl",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
                     "base_model": '<a href="https://huggingface.co/t5-3b" target="_blank">t5-3b</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
@@ -826,7 +826,7 @@
                 },
                 {
                     "name": "gtr-t5-xxl",
-                    "description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
+                    "description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
                     "base_model": '<a href="https://huggingface.co/t5-11b" target="_blank">t5-11b</a>',
                     "pooling": "Mean Pooling",
                     "training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",

diff --git a/docs/_themes/sphinx_rtd_theme/layout.html b/docs/_themes/sphinx_rtd_theme/layout.html
@@ -126,7 +126,7 @@
                 <a href="https://twitter.com/Nils_Reimers" target="_blank" title="Follow SBERT on Twitter"><img src="/_static/Twitter_Logo_White.svg" height="20" style="margin: 0px 10px 0px -10px;"> </a>
               </div> -->
               <div id="hf-button">
-                <a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/img/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
+                <a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
               </div>
               <div id="github-button"></div>
             </div>

diff --git a/docs/package_reference/sentence_transformer/trainer.md b/docs/package_reference/sentence_transformer/trainer.md
@@ -7,5 +7,5 @@
 .. autoclass:: sentence_transformers.trainer.SentenceTransformerTrainer
     :members:
     :inherited-members:
-    :exclude-members: autocast_smart_context_manager, collect_features, compute_loss_context_manager, evaluation_loop, floating_point_ops, get_decay_parameter_names, get_optimizer_cls_and_kwargs, init_hf_repo, log_metrics, metrics_format, num_examples, num_tokens, predict, prediction_loop, prediction_step, save_metrics, save_model, save_state, training_step
+    :exclude-members: autocast_smart_context_manager, collect_features, compute_loss_context_manager, evaluation_loop, floating_point_ops, get_decay_parameter_names, get_optimizer_cls_and_kwargs, init_hf_repo, log_metrics, metrics_format, num_examples, num_tokens, predict, prediction_loop, prediction_step, save_metrics, save_state, training_step
 ```
diff --git a/docs/sentence_transformer/training_overview.md b/docs/sentence_transformer/training_overview.md
@@ -171,8 +171,8 @@ Most loss functions can be initialized with just the :class:`SentenceTransformer
 .. sidebar:: Documentation
 
     - :class:`sentence_transformers.losses.CoSENTLoss`
-    - `Losses API Reference <../package_reference/sentence_transformer/losses>`_
-    - `Loss Overview <loss_overview>`_
+    - `Losses API Reference <../package_reference/sentence_transformer/losses.html>`_
+    - `Loss Overview <loss_overview.html>`_
 
 ::
 
@@ -200,7 +200,7 @@ Most loss functions can be initialized with just the :class:`SentenceTransformer
 ## Training Arguments
 
 ```eval_rst
-The :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` class can be used to specify parameters for influencing training performance as well as defining the tracking/debugging parameters. Although it is optional, it is heavily recommended to experiment with the various useful arguments.
+The :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` class can be used to specify parameters for influencing training performance as well as defining the tracking/debugging parameters. Although it is optional, it is heavily recommended to experiment with the various useful arguments.
 ```
 
 The following are tables with some of the most useful training arguments.
@@ -248,7 +248,7 @@ The following are tables with some of the most useful training arguments.
 <br>
 
 ```eval_rst
-Here is an example of how :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` can be initialized:
+Here is an example of how :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` can be initialized:
 ```
 
 ```python
@@ -259,6 +259,7 @@ args = SentenceTransformerTrainingArguments(
     num_train_epochs=1,
     per_device_train_batch_size=16,
     per_device_eval_batch_size=16,
+    learning_rate=2e-5,
     warmup_ratio=0.1,
     fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
     bf16=False,  # Set to True if you have a GPU that supports BF16
@@ -426,6 +427,7 @@ The :class:`~sentence_transformers.SentenceTransformerTrainer` is where all prev
         num_train_epochs=1,
         per_device_train_batch_size=16,
         per_device_eval_batch_size=16,
+        learning_rate=2e-5,
         warmup_ratio=0.1,
         fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
         bf16=False,  # Set to True if you have a GPU that supports BF16
@@ -499,7 +501,7 @@ The top performing models are trained using many datasets at once. Normally, thi
 - Use a dictionary of :class:`~datasets.Dataset` instances (or a :class:`~datasets.DatasetDict`) as the ``train_dataset`` and ``eval_dataset``.
 - (Optional) Use a dictionary of loss functions mapping dataset names to losses. Only required if you wish to use different loss function for different datasets.
 
-Each training/evaluation batch will only contain samples from one of the datasets. The order in which batches are samples from the multiple datasets is defined by the :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers` enum, which can be passed to the :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` via ``multi_dataset_batch_sampler``. Valid options are:
+Each training/evaluation batch will only contain samples from one of the datasets. The order in which batches are samples from the multiple datasets is defined by the :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers` enum, which can be passed to the :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` via ``multi_dataset_batch_sampler``. Valid options are:
 
 - ``MultiDatasetBatchSamplers.ROUND_ROBIN``: Round-robin sampling from each dataset until one is exhausted. With this strategy, it’s likely that not all samples from each dataset are used, but each dataset is sampled from equally.
 - ``MultiDatasetBatchSamplers.PROPORTIONAL`` (default): Sample from each dataset in proportion to its size. With this strategy, all samples from each dataset are used and larger datasets are sampled from more frequently.

diff --git a/examples/applications/embedding-quantization/README.md b/examples/applications/embedding-quantization/README.md
@@ -68,7 +68,7 @@ Note that you can also choose `"ubinary"` to quantize to binary using the unsign
 
 ## Scalar (int8) Quantization
 
-To convert the `float32` embeddings into `int8`, we use a process called scalar quantization. This involves mapping the continuous range of `float32` values to the discrete set of `int8` values, which can represent 256 distinct levels (from -128 to 127) as shown in the image below. This is done by using a large calibration dataset of embeddings. We compute the range of these embeddings, i.e. the `min` and `max` of each of the embedding dimensions. From there, we calculate the steps (buckets) in which we categorize each value.
+To convert the `float32` embeddings into `int8`, we use a process called scalar quantization. This involves mapping the continuous range of `float32` values to the discrete set of `int8` values, which can represent 256 distinct levels (from -128 to 127). This is done by using a large calibration dataset of embeddings. We compute the range of these embeddings, i.e. the `min` and `max` of each of the embedding dimensions. From there, we calculate the steps (buckets) in which we categorize each value.
 
 To further boost the retrieval performance, you can optionally apply the same rescoring step as for the binary embeddings. It is important to note here that the calibration dataset has a large influence on the performance, since it defines the buckets.
 

diff --git a/examples/training/sts/training_stsbenchmark.py b/examples/training/sts/training_stsbenchmark.py
@@ -69,7 +69,7 @@
     fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
     bf16=False,  # Set to True if you have a GPU that supports BF16
     # Optional tracking/debugging parameters:
-    eval_strategy="steps",
+    evaluation_strategy="steps",
     eval_steps=100,
     save_strategy="steps",
     save_steps=100,

diff --git a/sentence_transformers/LoggingHandler.py b/sentence_transformers/LoggingHandler.py
@@ -4,10 +4,10 @@
 
 
 class LoggingHandler(logging.Handler):
-    def __init__(self, level=logging.NOTSET):
+    def __init__(self, level=logging.NOTSET) -> None:
         super().__init__(level)
 
-    def emit(self, record):
+    def emit(self, record) -> None:
         try:
             msg = self.format(record)
             tqdm.tqdm.write(msg)
@@ -18,7 +18,7 @@ def emit(self, record):
             self.handleError(record)
 
 
-def install_logger(given_logger, level=logging.WARNING, fmt="%(levelname)s:%(name)s:%(message)s"):
+def install_logger(given_logger, level=logging.WARNING, fmt="%(levelname)s:%(name)s:%(message)s") -> None:
     """Configures the given logger; format, logging level, style, etc"""
     import coloredlogs