diff --git a/python/test_task/Dockerfile b/python/test_task/Dockerfile new file mode 100644 index 0000000..ea8aa60 --- /dev/null +++ b/python/test_task/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.11.2 + +RUN mkdir -p /opt/app + +WORKDIR /opt/app + +COPY requirements.txt . +COPY main.py . + +RUN pip install --no-cache-dir -r requirements.txt + + +#EXPOSE 8000 +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/python/test_task/main.py b/python/test_task/main.py new file mode 100644 index 0000000..13b1342 --- /dev/null +++ b/python/test_task/main.py @@ -0,0 +1,67 @@ +from typing import Union + +from fastapi import FastAPI +from pydantic import BaseModel + +import torch +from transformers import BertForSequenceClassification, AutoTokenizer + +LABELS = ['neutral', 'happiness', 'sadness', 'enthusiasm', 'fear', 'anger', 'disgust'] +tokenizer = AutoTokenizer.from_pretrained('Aniemore/rubert-tiny2-russian-emotion-detection') +model = BertForSequenceClassification.from_pretrained('Aniemore/rubert-tiny2-russian-emotion-detection') + + +@torch.no_grad() +def predict_emotion(text: str) -> str: + """ + We take the input text, tokenize it, pass it through the model, and then return the predicted label + :param text: The text to be classified + :type text: str + :return: The predicted emotion + """ + inputs = tokenizer(text, max_length=512, padding=True, truncation=True, return_tensors='pt') + outputs = model(**inputs) + predicted = torch.nn.functional.softmax(outputs.logits, dim=1) + predicted = torch.argmax(predicted, dim=1).numpy() + + return LABELS[predicted[0]] + + +@torch.no_grad() +def predict_emotions(text: str) -> list: + """ + It takes a string of text, tokenizes it, feeds it to the model, and returns a dictionary of emotions and their + probabilities + :param text: The text you want to classify + :type text: str + :return: A dictionary of emotions and their probabilities. + """ + inputs = tokenizer(text, max_length=512, padding=True, truncation=True, return_tensors='pt') + outputs = model(**inputs) + predicted = torch.nn.functional.softmax(outputs.logits, dim=1) + emotions_list = {} + for i in range(len(predicted.numpy()[0].tolist())): + emotions_list[LABELS[i]] = predicted.numpy()[0].tolist()[i] + return emotions_list + + +# simple_prediction = predict_emotion("Какой же сегодня прекрасный день, братья") +# not_simple_prediction = predict_emotions("Какой же сегодня прекрасный день, братья") + +# print(simple_prediction) +# print(not_simple_prediction) + +app = FastAPI() + + +class Text(BaseModel): + text: str + + +@app.post("/predict_emotions") +def text(text: Text): + results = [] + ans = predict_emotions(text.text) + for key, value in ans.items(): + results.append({"label": key, "score": value}) + return {"results": results} diff --git a/python/test_task/readme.txt b/python/test_task/readme.txt new file mode 100644 index 0000000..8b98ac3 --- /dev/null +++ b/python/test_task/readme.txt @@ -0,0 +1,6 @@ +docker build -t myimage . + +docker run -d --name mycontainer -p 8000:8000 myimage + +Документация доступна по адресу +http://127.0.0.1:8000/docs \ No newline at end of file diff --git a/python/test_task/requirements.txt b/python/test_task/requirements.txt new file mode 100644 index 0000000..3ab0770 Binary files /dev/null and b/python/test_task/requirements.txt differ