Skip to content

Commit

Permalink
Merge branch 'master' into v3.0-release
Browse files Browse the repository at this point in the history
  • Loading branch information
tomaarsen committed Jun 7, 2024
2 parents ba908be + f012ab3 commit 8a02e45
Show file tree
Hide file tree
Showing 60 changed files with 585 additions and 348 deletions.
26 changes: 13 additions & 13 deletions docs/_static/html/models_en_sentence_embeddings.html
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@
},
{
"name": "multi-qa-MiniLM-L6-dot-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/nreimers/MiniLM-L6-H384-uncased" target="_blank">nreimers/MiniLM-L6-H384-uncased</a>',
"pooling": "CLS Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -583,7 +583,7 @@
},
{
"name": "multi-qa-MiniLM-L6-cos-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/nreimers/MiniLM-L6-H384-uncased" target="_blank">nreimers/MiniLM-L6-H384-uncased</a>',
"pooling": "Mean Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -600,7 +600,7 @@
},
{
"name": "multi-qa-distilbert-dot-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
"pooling": "CLS Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -616,7 +616,7 @@
},
{
"name": "multi-qa-distilbert-cos-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
"pooling": "Mean Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -633,7 +633,7 @@
},
{
"name": "multi-qa-mpnet-base-dot-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/microsoft/mpnet-base" target="_blank"microsoft/mpnet-base</a>',
"pooling": "CLS Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -650,7 +650,7 @@
},
{
"name": "multi-qa-mpnet-base-cos-v1",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on a large and diverse set of (question, answer) pairs.",
"base_model": '<a href="https://huggingface.co/microsoft/mpnet-base" target="_blank">microsoft/mpnet-base</a>',
"pooling": "Mean Pooling",
"training_data": "215M (question, answer) pairs from diverse sources.",
Expand All @@ -666,7 +666,7 @@
},
{
"name": "msmarco-distilbert-dot-v5",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
"base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
"pooling": "Mean Pooling",
"training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
Expand All @@ -682,7 +682,7 @@
},
{
"name": "msmarco-bert-base-dot-v5",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
"base_model": '<a href="https://huggingface.co/Luyu/co-condenser-marco" target="_blank">Luyu/co-condenser-marco</a>',
"pooling": "Mean Pooling",
"training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
Expand All @@ -698,7 +698,7 @@
},
{
"name": "msmarco-distilbert-base-tas-b",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages. It was trained on the MS MARCO passages dataset.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages. It was trained on the MS MARCO passages dataset.",
"base_model": '<a href="https://huggingface.co/distilbert-base" target="_blank">distilbert-base</a>',
"pooling": "Mean Pooling",
"training_data": "500k (query, answer) pairs from MS MARCO Passages dataset.",
Expand Down Expand Up @@ -778,7 +778,7 @@
},
{
"name": "gtr-t5-base",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
"base_model": '<a href="https://huggingface.co/t5-base" target="_blank">t5-base</a>',
"pooling": "Mean Pooling",
"training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
Expand All @@ -794,7 +794,7 @@
},
{
"name": "gtr-t5-large",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
"base_model": '<a href="https://huggingface.co/t5-large" target="_blank">t5-large</a>',
"pooling": "Mean Pooling",
"training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
Expand All @@ -810,7 +810,7 @@
},
{
"name": "gtr-t5-xl",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
"base_model": '<a href="https://huggingface.co/t5-3b" target="_blank">t5-3b</a>',
"pooling": "Mean Pooling",
"training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
Expand All @@ -826,7 +826,7 @@
},
{
"name": "gtr-t5-xxl",
"description": "This model was tuned for semantic search: Given a query/question, if can find relevant passages.",
"description": "This model was tuned for semantic search: Given a query/question, it can find relevant passages.",
"base_model": '<a href="https://huggingface.co/t5-11b" target="_blank">t5-11b</a>',
"pooling": "Mean Pooling",
"training_data": "2B question-answer pairs from diverse online communities and then on MS-MARCO.",
Expand Down
2 changes: 1 addition & 1 deletion docs/_themes/sphinx_rtd_theme/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
<a href="https://twitter.com/Nils_Reimers" target="_blank" title="Follow SBERT on Twitter"><img src="/_static/Twitter_Logo_White.svg" height="20" style="margin: 0px 10px 0px -10px;"> </a>
</div> -->
<div id="hf-button">
<a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/img/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
<a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
</div>
<div id="github-button"></div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion docs/package_reference/sentence_transformer/trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
.. autoclass:: sentence_transformers.trainer.SentenceTransformerTrainer
:members:
:inherited-members:
:exclude-members: autocast_smart_context_manager, collect_features, compute_loss_context_manager, evaluation_loop, floating_point_ops, get_decay_parameter_names, get_optimizer_cls_and_kwargs, init_hf_repo, log_metrics, metrics_format, num_examples, num_tokens, predict, prediction_loop, prediction_step, save_metrics, save_model, save_state, training_step
:exclude-members: autocast_smart_context_manager, collect_features, compute_loss_context_manager, evaluation_loop, floating_point_ops, get_decay_parameter_names, get_optimizer_cls_and_kwargs, init_hf_repo, log_metrics, metrics_format, num_examples, num_tokens, predict, prediction_loop, prediction_step, save_metrics, save_state, training_step
```
12 changes: 7 additions & 5 deletions docs/sentence_transformer/training_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ Most loss functions can be initialized with just the :class:`SentenceTransformer
.. sidebar:: Documentation
- :class:`sentence_transformers.losses.CoSENTLoss`
- `Losses API Reference <../package_reference/sentence_transformer/losses>`_
- `Loss Overview <loss_overview>`_
- `Losses API Reference <../package_reference/sentence_transformer/losses.html>`_
- `Loss Overview <loss_overview.html>`_
::
Expand Down Expand Up @@ -200,7 +200,7 @@ Most loss functions can be initialized with just the :class:`SentenceTransformer
## Training Arguments

```eval_rst
The :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` class can be used to specify parameters for influencing training performance as well as defining the tracking/debugging parameters. Although it is optional, it is heavily recommended to experiment with the various useful arguments.
The :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` class can be used to specify parameters for influencing training performance as well as defining the tracking/debugging parameters. Although it is optional, it is heavily recommended to experiment with the various useful arguments.
```

The following are tables with some of the most useful training arguments.
Expand Down Expand Up @@ -248,7 +248,7 @@ The following are tables with some of the most useful training arguments.
<br>

```eval_rst
Here is an example of how :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` can be initialized:
Here is an example of how :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` can be initialized:
```

```python
Expand All @@ -259,6 +259,7 @@ args = SentenceTransformerTrainingArguments(
num_train_epochs=1,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
learning_rate=2e-5,
warmup_ratio=0.1,
fp16=True, # Set to False if you get an error that your GPU can't run on FP16
bf16=False, # Set to True if you have a GPU that supports BF16
Expand Down Expand Up @@ -426,6 +427,7 @@ The :class:`~sentence_transformers.SentenceTransformerTrainer` is where all prev
num_train_epochs=1,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
learning_rate=2e-5,
warmup_ratio=0.1,
fp16=True, # Set to False if you get an error that your GPU can't run on FP16
bf16=False, # Set to True if you have a GPU that supports BF16
Expand Down Expand Up @@ -499,7 +501,7 @@ The top performing models are trained using many datasets at once. Normally, thi
- Use a dictionary of :class:`~datasets.Dataset` instances (or a :class:`~datasets.DatasetDict`) as the ``train_dataset`` and ``eval_dataset``.
- (Optional) Use a dictionary of loss functions mapping dataset names to losses. Only required if you wish to use different loss function for different datasets.
Each training/evaluation batch will only contain samples from one of the datasets. The order in which batches are samples from the multiple datasets is defined by the :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers` enum, which can be passed to the :class:`~sentence_transformers.training_args.SentenceTransformersTrainingArguments` via ``multi_dataset_batch_sampler``. Valid options are:
Each training/evaluation batch will only contain samples from one of the datasets. The order in which batches are samples from the multiple datasets is defined by the :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers` enum, which can be passed to the :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments` via ``multi_dataset_batch_sampler``. Valid options are:
- ``MultiDatasetBatchSamplers.ROUND_ROBIN``: Round-robin sampling from each dataset until one is exhausted. With this strategy, it’s likely that not all samples from each dataset are used, but each dataset is sampled from equally.
- ``MultiDatasetBatchSamplers.PROPORTIONAL`` (default): Sample from each dataset in proportion to its size. With this strategy, all samples from each dataset are used and larger datasets are sampled from more frequently.
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/embedding-quantization/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Note that you can also choose `"ubinary"` to quantize to binary using the unsign

## Scalar (int8) Quantization

To convert the `float32` embeddings into `int8`, we use a process called scalar quantization. This involves mapping the continuous range of `float32` values to the discrete set of `int8` values, which can represent 256 distinct levels (from -128 to 127) as shown in the image below. This is done by using a large calibration dataset of embeddings. We compute the range of these embeddings, i.e. the `min` and `max` of each of the embedding dimensions. From there, we calculate the steps (buckets) in which we categorize each value.
To convert the `float32` embeddings into `int8`, we use a process called scalar quantization. This involves mapping the continuous range of `float32` values to the discrete set of `int8` values, which can represent 256 distinct levels (from -128 to 127). This is done by using a large calibration dataset of embeddings. We compute the range of these embeddings, i.e. the `min` and `max` of each of the embedding dimensions. From there, we calculate the steps (buckets) in which we categorize each value.

To further boost the retrieval performance, you can optionally apply the same rescoring step as for the binary embeddings. It is important to note here that the calibration dataset has a large influence on the performance, since it defines the buckets.

Expand Down
2 changes: 1 addition & 1 deletion examples/training/sts/training_stsbenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
fp16=True, # Set to False if you get an error that your GPU can't run on FP16
bf16=False, # Set to True if you have a GPU that supports BF16
# Optional tracking/debugging parameters:
eval_strategy="steps",
evaluation_strategy="steps",
eval_steps=100,
save_strategy="steps",
save_steps=100,
Expand Down
6 changes: 3 additions & 3 deletions sentence_transformers/LoggingHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@


class LoggingHandler(logging.Handler):
def __init__(self, level=logging.NOTSET):
def __init__(self, level=logging.NOTSET) -> None:
super().__init__(level)

def emit(self, record):
def emit(self, record) -> None:
try:
msg = self.format(record)
tqdm.tqdm.write(msg)
Expand All @@ -18,7 +18,7 @@ def emit(self, record):
self.handleError(record)


def install_logger(given_logger, level=logging.WARNING, fmt="%(levelname)s:%(name)s:%(message)s"):
def install_logger(given_logger, level=logging.WARNING, fmt="%(levelname)s:%(name)s:%(message)s") -> None:
"""Configures the given logger; format, logging level, style, etc"""
import coloredlogs

Expand Down
Loading

0 comments on commit 8a02e45

Please sign in to comment.