Skip to content

Commit

Permalink
Update dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
Riccorl committed Sep 9, 2021
1 parent 70616d0 commit ef68132
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ dmypy.json
.prof

### vscode ###
.vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
Expand Down
Binary file added docs/pip_tre.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 6 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
with open("README.md", "r") as fh:
long_description = fh.read()

extras = {"torch": ["torch>=1.5,<1.10"], "spacy": ["spacy>=3.0,<3.2"]}
extras = {}
extras["torch"] = ["torch>=1.5,<1.10"]
extras["spacy"] = ["spacy>=3.0,<3.2"]
extras["all"] = extras["torch"] + extras["spacy"]

install_requires = ["transformers>=4.3<4.9"]
install_requires = ["transformers>=4.3,<4.9"]

setuptools.setup(
name="transformer_embedder", # Replace with your own username
version="1.7.13",
version="1.7.14",
author="Riccardo Orlando",
author_email="orlandoricc@gmail.com",
description="Word level transformer based embeddings",
Expand Down
38 changes: 28 additions & 10 deletions transformer_embedder/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ def __init__(
layers (`sum`), the pooled output (`pooled`).
fine_tune (): if `True`, the transformer model is fine-tuned during training.
return_all (): if `True`, returns all the outputs from the HuggingFace model.
Args:
model (str or :obj:`transformers.PreTrainedModel`): A string with the name of the model
or a :obj:`transformers.PreTrainedModel` object.
subtoken_pooling (str, optional): Method for pooling the sub-tokens. Can either be `first`, `last`, `mean`, or `sum`.
output_layer (str, optional): Method for pooling the word embeddings. Can either be `last`, `concat`,
`sum`, `pooled`, or `none`.
fine_tune (bool, optional): Whether to fine-tune the model.
return_all (bool, optional): Whether to return all outputs of the model.
"""
super().__init__()
if isinstance(model, str):
Expand Down Expand Up @@ -78,15 +88,21 @@ def forward(
Forward method of the PyTorch module.
Args:
input_ids (torch.LongTensor): Input ids for the transformer model
offsets (torch.LongTensor): Offsets of the sub-token, used to reconstruct the word embeddings
attention_mask (torch.BoolTensor): Attention mask for the transformer model
token_type_ids (torch.LongTensor): Token type ids for the transformer model
*args ():
**kwargs ():
input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`):
Indices of input sequence tokens in the vocabulary.
offsets (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, optional):
Offsets of the sub-token, used to reconstruct the word embeddings.
attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, sequence_length)`, optional):
Mask to avoid performing attention on padding token indices.
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, optional):
Segment token indices to indicate first and second portions of the inputs.
args:
Additional positional arguments.
kwargs:
Additional keyword arguments.
Returns:
the word embeddings
:obj:`WordsModelOutput`: The output of the model.
"""
# Some of the huggingface models don't have the
Expand Down Expand Up @@ -134,11 +150,13 @@ def get_word_embeddings(
It computes the mean of the sub-tokens or taking one (first or last) as word representation.
Args:
embeddings (torch.Tensor): sub-tokens embeddings
offsets (torch.Tensor): offsets of the sub-tokens
embeddings (:obj:`torch.Tensor` of shape :obj:`(batch_size, num_subtoken, embedding_size)`):
Sub-tokens embeddings.
offsets (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_subtoken)`, optional):
Offsets of the sub-tokens.
Returns:
torch.Tensor: the word embeddings
:obj:`torch.Tensor`: The word embeddings.
"""
# no offsets provided, returns the embeddings as they are.
Expand Down

0 comments on commit ef68132

Please sign in to comment.