diff --git a/backend/alembic/versions/3fbb82ea683d_add_segment_translation_source.py b/backend/alembic/versions/3fbb82ea683d_add_segment_translation_source.py new file mode 100644 index 0000000..24a63a1 --- /dev/null +++ b/backend/alembic/versions/3fbb82ea683d_add_segment_translation_source.py @@ -0,0 +1,46 @@ +"""Add segment translation source + +Revision ID: 3fbb82ea683d +Revises: 32d5a77e6615 +Create Date: 2025-12-06 13:49:58.517637 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# pylint: disable=E1101 + +# revision identifiers, used by Alembic. +revision: str = "3fbb82ea683d" +down_revision: Union[str, None] = "32d5a77e6615" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +segmentsource = sa.Enum( + "glossary", + "machine_translation", + "translation_memory", + "full_match", + name="recordsource", +) + + +def upgrade() -> None: + segmentsource.create(op.get_bind(), checkfirst=True) + op.add_column( + "document_record", + sa.Column( + "target_source", + segmentsource, + nullable=True, + ), + ) + + +def downgrade() -> None: + op.drop_column("document_record", "target_source") + segmentsource.drop(op.get_bind(), checkfirst=True) diff --git a/backend/app/documents/models.py b/backend/app/documents/models.py index b5547ba..4450ebd 100644 --- a/backend/app/documents/models.py +++ b/backend/app/documents/models.py @@ -97,6 +97,13 @@ class Document(Base): ) +class RecordSource(Enum): + glossary = "glossary" + machine_translation = "mt" + translation_memory = "tm" + full_match = "fm" # for digits + + class DocumentRecord(Base): __tablename__ = "document_record" @@ -105,6 +112,7 @@ class DocumentRecord(Base): source: Mapped[str] = mapped_column() target: Mapped[str] = mapped_column() approved: Mapped[bool] = mapped_column(default=False) + target_source: Mapped[RecordSource] = mapped_column(nullable=True) document: Mapped["Document"] = relationship(back_populates="records") comments: Mapped[list["Comment"]] = relationship( diff --git a/backend/app/documents/schema.py b/backend/app/documents/schema.py index ff964c0..73eb8b9 100644 --- a/backend/app/documents/schema.py +++ b/backend/app/documents/schema.py @@ -2,10 +2,10 @@ from pydantic import BaseModel, Field -from app.documents.models import TmMode +from app.documents.models import RecordSource, TmMode from app.glossary.schema import GlossaryResponse from app.models import DocumentStatus, Identified, MachineTranslationSettings -from app.translation_memory.schema import TranslationMemory, TranslationMemoryUsage +from app.translation_memory.schema import TranslationMemory class DocumentRecordFilter(BaseModel): @@ -31,6 +31,7 @@ class DocumentRecord(Identified): approved: bool repetitions_count: int has_comments: bool + translation_src: RecordSource | None class DocumentRecordListResponse(BaseModel): @@ -52,9 +53,7 @@ class DocumentRecordUpdate(BaseModel): class DocumentProcessingSettings(BaseModel): - substitute_numbers: bool machine_translation_settings: Optional[MachineTranslationSettings] - memory_usage: TranslationMemoryUsage similarity_threshold: float = Field(default=1.0, ge=0.0, le=1.0) diff --git a/backend/app/routers/document.py b/backend/app/routers/document.py index e60f476..7745e98 100644 --- a/backend/app/routers/document.py +++ b/backend/app/routers/document.py @@ -118,7 +118,6 @@ def get_doc_records( query = GenericDocsQuery(db) total_records = query.get_document_records_count_filtered(doc, filters) records = query.get_document_records_paged(doc, page, filters=filters) - record_list = [ doc_schema.DocumentRecord( id=record.id, @@ -127,6 +126,9 @@ def get_doc_records( approved=record.approved, repetitions_count=repetitions_count, has_comments=has_comments, + translation_src=record.target_source.value + if record.target_source + else None, ) for record, repetitions_count, has_comments in records ] diff --git a/backend/app/translation_memory/schema.py b/backend/app/translation_memory/schema.py index 3ba36b7..3262387 100644 --- a/backend/app/translation_memory/schema.py +++ b/backend/app/translation_memory/schema.py @@ -1,5 +1,3 @@ -from enum import Enum - from pydantic import BaseModel, Field from app.base.schema import Identified @@ -11,11 +9,6 @@ class MemorySubstitution(BaseModel): similarity: float -class TranslationMemoryUsage(Enum): - NEWEST = "newest" - OLDEST = "oldest" - - class TranslationMemory(Identified): name: str created_by: int diff --git a/backend/tests/fixtures/small.xliff b/backend/tests/fixtures/small.xliff index 789c792..0269f36 100644 --- a/backend/tests/fixtures/small.xliff +++ b/backend/tests/fixtures/small.xliff @@ -23,6 +23,10 @@ 123456789 + + Something else + + \ No newline at end of file diff --git a/backend/tests/routers/test_routes_doc_records.py b/backend/tests/routers/test_routes_doc_records.py index a4f9a90..ecb7cb3 100644 --- a/backend/tests/routers/test_routes_doc_records.py +++ b/backend/tests/routers/test_routes_doc_records.py @@ -10,6 +10,7 @@ Document, DocumentRecord, DocumentType, + RecordSource, ) from app.translation_memory.models import TranslationMemory, TranslationMemoryRecord @@ -19,7 +20,11 @@ def test_can_get_doc_records(user_logged_client: TestClient, session: Session): with session as s: records = [ - DocumentRecord(source="Regional Effects", target="Translation"), + DocumentRecord( + source="Regional Effects", + target="Translation", + target_source=RecordSource.translation_memory, + ), DocumentRecord(source="User Interface", target="UI", approved=True), ] s.add( @@ -46,6 +51,7 @@ def test_can_get_doc_records(user_logged_client: TestClient, session: Session): "approved": False, "repetitions_count": 1, "has_comments": False, + "translation_src": "tm", }, { "id": 2, @@ -54,6 +60,7 @@ def test_can_get_doc_records(user_logged_client: TestClient, session: Session): "approved": True, "repetitions_count": 1, "has_comments": False, + "translation_src": None, }, ] @@ -94,6 +101,7 @@ def test_doc_records_returns_second_page( "approved": False, "repetitions_count": 1, "has_comments": False, + "translation_src": None, } diff --git a/backend/tests/routers/test_routes_documents.py b/backend/tests/routers/test_routes_documents.py index 9e6366f..2c51ed9 100644 --- a/backend/tests/routers/test_routes_documents.py +++ b/backend/tests/routers/test_routes_documents.py @@ -25,7 +25,6 @@ from app.models import DocumentStatus from app.schema import DocumentTask from app.translation_memory.models import TranslationMemory -from app.translation_memory.schema import TranslationMemoryUsage # pylint: disable=C0116 @@ -296,9 +295,7 @@ def test_process_sets_document_in_pending_stage_and_creates_task_xliff( response = user_logged_client.post( "/document/1/process", json={ - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": TranslationMemoryUsage.NEWEST.value, }, ) @@ -318,9 +315,7 @@ def test_process_sets_document_in_pending_stage_and_creates_task_txt( response = user_logged_client.post( "/document/1/process", json={ - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": TranslationMemoryUsage.NEWEST.value, }, ) @@ -340,9 +335,7 @@ def test_process_creates_task_for_xliff( response = user_logged_client.post( "/document/1/process", json={ - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": TranslationMemoryUsage.NEWEST.value, }, ) @@ -356,9 +349,7 @@ def test_process_creates_task_for_xliff( "type": "xliff", "document_id": 1, "settings": { - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": "newest", "similarity_threshold": 1.0, }, } @@ -371,9 +362,7 @@ def test_process_creates_task_for_txt(user_logged_client: TestClient, session: S response = user_logged_client.post( "/document/1/process", json={ - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": TranslationMemoryUsage.NEWEST.value, }, ) @@ -387,9 +376,7 @@ def test_process_creates_task_for_txt(user_logged_client: TestClient, session: S "type": "txt", "document_id": 1, "settings": { - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": "newest", "similarity_threshold": 1.0, }, } @@ -401,9 +388,7 @@ def test_returns_404_when_processing_nonexistent_doc( response = user_logged_client.post( "/document/1/process", json={ - "substitute_numbers": False, "machine_translation_settings": None, - "memory_usage": TranslationMemoryUsage.NEWEST.value, }, ) assert response.status_code == 404 diff --git a/backend/tests/test_worker.py b/backend/tests/test_worker.py index 60d7e86..f42ff53 100644 --- a/backend/tests/test_worker.py +++ b/backend/tests/test_worker.py @@ -11,6 +11,7 @@ DocMemoryAssociation, Document, DocumentType, + RecordSource, TxtDocument, TxtRecord, XliffDocument, @@ -27,7 +28,6 @@ ) from app.schema import DocumentTask from app.translation_memory.models import TranslationMemory, TranslationMemoryRecord -from app.translation_memory.schema import TranslationMemoryUsage from worker import process_task # pylint: disable=C0116 @@ -54,8 +54,6 @@ def create_xliff_doc(data: str): def create_task( *, type_: Literal["xliff", "txt"] = "xliff", - usage: TranslationMemoryUsage = TranslationMemoryUsage.NEWEST, - substitute_numbers: bool = False, mt_settings: YandexTranslatorSettings | None = None, ): return DocumentTask( @@ -63,9 +61,7 @@ def create_task( type=type_, document_id=1, settings=DocumentProcessingSettings( - substitute_numbers=substitute_numbers, machine_translation_settings=mt_settings, - memory_usage=usage, ), ).model_dump_json(), status="pending", @@ -79,6 +75,18 @@ def test_process_task_sets_xliff_records(session: Session): with session as s: s.add_all( [ + Glossary( + name="test_glossary", + created_by=1, + records=[ + GlossaryRecord( + source="Something else", + target="Глоссарный перевод", + created_by=1, + stemmed_source="something else", + ) + ], + ), TranslationMemory( name="test", records=[ @@ -92,6 +100,7 @@ def test_process_task_sets_xliff_records(session: Session): create_doc(name="small.xliff", type_=DocumentType.xliff), create_xliff_doc(file_data), DocMemoryAssociation(doc_id=1, tm_id=1, mode="read"), + DocGlossaryAssociation(document_id=1, glossary_id=1), ] ) @@ -104,7 +113,7 @@ def test_process_task_sets_xliff_records(session: Session): doc = s.query(Document).filter_by(id=1).one() assert doc.processing_status == "done" - assert len(doc.records) == 4 + assert len(doc.records) == 5 assert all(record.document_id == 1 for record in doc.records) assert all(record.id == idx + 1 for idx, record in enumerate(doc.records)) @@ -113,6 +122,7 @@ def test_process_task_sets_xliff_records(session: Session): record = doc.records[0] assert record.source == "Regional Effects" assert record.target == "Translation" + assert record.target_source == RecordSource.translation_memory assert not record.approved xliff_record = ( s.query(XliffRecord).filter(XliffRecord.parent_id == record.id).one() @@ -124,6 +134,7 @@ def test_process_task_sets_xliff_records(session: Session): record = doc.records[1] assert record.source == "Other Effects" assert record.target == "" + assert record.target_source is None assert not record.approved xliff_record = ( s.query(XliffRecord).filter(XliffRecord.parent_id == record.id).one() @@ -135,6 +146,7 @@ def test_process_task_sets_xliff_records(session: Session): record = doc.records[2] assert record.source == "Regional Effects" assert record.target == "Региональные эффекты" + assert record.target_source is None assert record.approved xliff_record = ( s.query(XliffRecord).filter(XliffRecord.parent_id == record.id).one() @@ -142,16 +154,29 @@ def test_process_task_sets_xliff_records(session: Session): assert xliff_record.segment_id == 675608 assert xliff_record.state == "translated" - # It does not substitute numbers + # It does substitute numbers record = doc.records[3] assert record.source == "123456789" - assert record.target == "" - assert not record.approved + assert record.target == "123456789" + assert record.target_source == RecordSource.full_match + assert record.approved xliff_record = ( s.query(XliffRecord).filter(XliffRecord.parent_id == record.id).one() ) assert xliff_record.segment_id == 675609 - assert xliff_record.state == "needs-translation" + assert xliff_record.state == "translated" + + # It does substitute glossary records + record = doc.records[4] + assert record.source == "Something else" + assert record.target == "Глоссарный перевод" + assert record.target_source == RecordSource.glossary + assert record.approved + xliff_record = ( + s.query(XliffRecord).filter(XliffRecord.parent_id == record.id).one() + ) + assert xliff_record.segment_id == 675610 + assert xliff_record.state == "translated" def test_process_task_sets_txt_records(session: Session): @@ -236,6 +261,7 @@ def test_process_task_sets_txt_records(session: Session): record = doc.records[4] assert record.source == "The sloth is named Razak." assert record.target == "Translation" + assert record.target_source == RecordSource.translation_memory assert ( s.query(TxtRecord).filter_by(parent_id=record.id).one().offset == 310 if crlf @@ -289,97 +315,21 @@ def test_process_task_uses_correct_tm_ids(session: Session): assert doc.records[0].target == "Another translation" -@pytest.mark.parametrize( - ["mode", "trans_result"], - [("newest", "Another translation"), ("oldest", "Translation")], -) -def test_process_task_uses_tm_mode(mode: str, trans_result: str, session: Session): - with open("tests/fixtures/small.xliff", "r", encoding="utf-8") as fp: - file_data = fp.read() - - with session as s: - tm_records_1 = [ - TranslationMemoryRecord( - source="Regional Effects", - target="Translation", - creation_date=datetime(2020, 1, 1, 0, 0, 0), - change_date=datetime(2020, 1, 1, 0, 0, 0), - ) - ] - tm_records_2 = [ - TranslationMemoryRecord( - source="Regional Effects", - target="Another translation", - creation_date=datetime(2021, 1, 1, 0, 0, 0), - change_date=datetime(2021, 1, 1, 0, 0, 0), - ) - ] - s.add_all( - [ - TranslationMemory(name="test1", records=tm_records_1, created_by=1), - TranslationMemory(name="test2", records=tm_records_2, created_by=1), - create_doc(name="small.xliff", type_=DocumentType.xliff), - create_xliff_doc(file_data), - create_task(usage=TranslationMemoryUsage(mode)), - DocMemoryAssociation(doc_id=1, tm_id=1, mode="read"), - DocMemoryAssociation(doc_id=1, tm_id=2, mode="read"), - ] - ) - s.commit() - - result = process_task(s, s.query(DocumentTask).one()) - assert result - - doc = s.query(Document).filter_by(id=1).one() - assert doc.processing_status == "done" - assert len(doc.records) > 1 - assert doc.records[0].target == trans_result - - -def test_process_task_substitutes_numbers(session: Session): - with open("tests/fixtures/small.xliff", "r", encoding="utf-8") as fp: - file_data = fp.read() - - with session as s: - s.add_all( - [ - TranslationMemory(name="test", records=[], created_by=1), - create_doc(name="small.xliff", type_=DocumentType.xliff), - create_xliff_doc(file_data), - create_task(substitute_numbers=True), - ] - ) - s.commit() - - result = process_task(s, s.query(DocumentTask).one()) - assert result - - doc = s.query(Document).filter_by(id=1).one() - assert doc.processing_status == "done" - assert len(doc.records) == 4 - assert doc.records[3].source == "123456789" - assert doc.records[3].target == "123456789" - - @pytest.mark.parametrize( "task_data", [ { "document_id": 1, "settings": { - "substitute_numbers": False, "use_machine_translation": False, "machine_translation_settings": None, - "memory_usage": "newest", }, }, { "type": "xliff", "settings": { - "substitute_numbers": False, "use_machine_translation": False, "machine_translation_settings": None, - "memory_usage": "newest", }, }, { @@ -390,10 +340,8 @@ def test_process_task_substitutes_numbers(session: Session): "type": "broken", "document_id": 1, "settings": { - "substitute_numbers": False, "use_machine_translation": False, "machine_translation_settings": None, - "memory_usage": "newest", }, }, ], diff --git a/backend/worker.py b/backend/worker.py index 465cf6c..a47a2df 100644 --- a/backend/worker.py +++ b/backend/worker.py @@ -1,10 +1,10 @@ -# This is a worker that takes tasks from the database every 10 seconds and +# This is a worker that takes tasks from the database every N seconds and # processes files in it. # Tasks are stored in document_task table and encoded in JSON. import logging import time -from typing import Iterable, Sequence +from typing import Iterable, Literal, Sequence, overload from sqlalchemy import select from sqlalchemy.orm import Session @@ -14,12 +14,12 @@ Document, DocumentRecord, DocumentType, + RecordSource, TxtRecord, XliffRecord, ) from app.documents.query import GenericDocsQuery from app.documents.schema import DocumentProcessingSettings, DocumentTaskDescription -from app.formats.base import BaseSegment from app.formats.txt import TxtSegment, extract_txt_content from app.formats.xliff import XliffSegment, extract_xliff_content from app.glossary.models import GlossaryRecord @@ -28,29 +28,76 @@ from app.schema import DocumentTask from app.translation_memory.models import TranslationMemoryRecord from app.translation_memory.query import TranslationMemoryQuery -from app.translation_memory.schema import TranslationMemoryUsage from app.translators import llm, yandex from app.translators.common import LineWithGlossaries +type FormatSegment = XliffSegment | TxtSegment + + +class WorkerSegment: + @overload + def __init__( + self, *, type_: Literal["xliff"], original_segment: XliffSegment + ) -> None: ... + + @overload + def __init__( + self, *, type_: Literal["txt"], original_segment: TxtSegment + ) -> None: ... + + def __init__( + self, + *, + type_: Literal["xliff", "txt"], + original_segment: FormatSegment, + ) -> None: + self._segment_src = None + self._type = type_ + self._approved = False + self.original_segment = original_segment + assert (type_ == "xliff" and isinstance(original_segment, XliffSegment)) or ( + type_ == "txt" and isinstance(original_segment, TxtSegment) + ) + if isinstance(original_segment, XliffSegment): + self._approved = original_segment.approved + + @property + def segment_source(self) -> RecordSource | None: + return self._segment_src + + @segment_source.setter + def segment_source(self, value: RecordSource | None): + self._segment_src = value + + @property + def approved(self): + return self._approved -def segment_needs_processing(segment: BaseSegment) -> bool: - if isinstance(segment, XliffSegment): - return not segment.approved - return True + @approved.setter + def approved(self, value: bool): + self._approved = value + + @property + def type_(self): + return self._type + + @property + def needs_processing(self) -> bool: + if isinstance(self.original_segment, XliffSegment): + return not self.original_segment.approved + return True def get_segment_translation( source: str, threshold: float, tm_ids: list[int], - tm_usage: TranslationMemoryUsage, - substitute_numbers: bool, glossary_ids: list[int], session: Session, -) -> str | None: +) -> tuple[str, RecordSource | None] | None: # TODO: this would be nice to have batching for all segments to reduce amounts of requests to DB - if substitute_numbers and source.isdigit(): - return source + if source.isdigit(): + return source, RecordSource.full_match glossary_record = ( session.query(GlossaryRecord) @@ -61,31 +108,23 @@ def get_segment_translation( .first() ) if glossary_record: - return glossary_record.target + return glossary_record.target, RecordSource.glossary if threshold < 1.0: substitutions = TranslationMemoryQuery(session).get_substitutions( source, tm_ids, threshold, 1 ) if substitutions: - return substitutions[0].target + return substitutions[0].target, RecordSource.translation_memory else: selector = ( select(TranslationMemoryRecord.source, TranslationMemoryRecord.target) .where(TranslationMemoryRecord.source == source) .where(TranslationMemoryRecord.document_id.in_(tm_ids)) + .order_by(TranslationMemoryRecord.change_date.desc()) ) - match tm_usage: - case TranslationMemoryUsage.NEWEST: - selector = selector.order_by(TranslationMemoryRecord.change_date.desc()) - case TranslationMemoryUsage.OLDEST: - selector = selector.order_by(TranslationMemoryRecord.change_date.asc()) - case _: - logging.error("Unknown translation memory usage option") - return None - tm_data = session.execute(selector.limit(1)).first() - return tm_data.target if tm_data else None + return (tm_data.target, RecordSource.translation_memory) if tm_data else None return None @@ -149,15 +188,21 @@ def process_document( return mt_result -def extract_segments(doc: Document) -> Sequence[BaseSegment]: +def extract_segments(doc: Document) -> Sequence[WorkerSegment]: if doc.type == DocumentType.xliff: xliff_document = doc.xliff xliff_data = extract_xliff_content(xliff_document.original_document.encode()) - return xliff_data.segments + return [ + WorkerSegment(type_="xliff", original_segment=segment) + for segment in xliff_data.segments + ] if doc.type == DocumentType.txt: txt_document = doc.txt txt_data = extract_txt_content(txt_document.original_document) - return txt_data.segments + return [ + WorkerSegment(type_="txt", original_segment=segment) + for segment in txt_data.segments + ] logging.error("Unknown document type") return [] @@ -166,7 +211,7 @@ def extract_segments(doc: Document) -> Sequence[BaseSegment]: def substitute_segments( settings: DocumentProcessingSettings, session: Session, - segments: Iterable[BaseSegment], + segments: Iterable[WorkerSegment], tm_ids: list[int], glossary_ids: list[int], ) -> list[int]: @@ -176,15 +221,13 @@ def substitute_segments( """ to_translate: list[int] = [] for idx, segment in enumerate(segments): - if not segment_needs_processing(segment): + if not segment.needs_processing: continue translation = get_segment_translation( - segment.original, + segment.original_segment.original, settings.similarity_threshold, tm_ids, - settings.memory_usage, - settings.substitute_numbers, glossary_ids, session, ) @@ -192,12 +235,16 @@ def substitute_segments( to_translate.append(idx) continue - segment.translation = translation or "" + target_translation, segment_src = translation + segment.original_segment.translation = target_translation or "" + segment.segment_source = segment_src + if segment_src in (RecordSource.full_match, RecordSource.glossary): + segment.approved = True return to_translate def translate_segments( - segments: Sequence[BaseSegment], + segments: Sequence[WorkerSegment], translate_indices: Sequence[int], glossary_ids: list[int], mt_settings: MachineTranslationSettings, @@ -210,7 +257,9 @@ def translate_segments( try: # TODO: this might be harmful with LLM translation as it is loses # the connectivity of the context - segments_to_translate = [segments[idx].original for idx in translate_indices] + segments_to_translate = [ + segments[idx].original_segment.original for idx in translate_indices + ] data_to_translate: list[LineWithGlossaries] = [] for segment in segments_to_translate: glossary_records = GlossaryQuery(session).get_glossary_records_for_segment( @@ -233,7 +282,9 @@ def translate_segments( logging.fatal("Unknown translation API") raise RuntimeError("Unknown translation API") for idx, translated_line in enumerate(translated): - segments[translate_indices[idx]].translation = translated_line + segments[ + translate_indices[idx] + ].original_segment.translation = translated_line # TODO: handle specific exceptions instead of a generic one except Exception as e: logging.error("Machine translation error %s", e) @@ -245,13 +296,15 @@ def translate_segments( def create_doc_segments( doc: Document, session: Session, - segments: Iterable[BaseSegment], + segments: Iterable[WorkerSegment], ) -> None: doc_records = [ DocumentRecord( document_id=doc.id, - source=segment.original, - target=segment.translation or "", + source=segment.original_segment.original, + target=segment.original_segment.translation or "", + approved=segment.approved, + target_source=segment.segment_source, ) for segment in segments ] @@ -263,27 +316,30 @@ def create_doc_segments( if doc.type == DocumentType.xliff: xliff_records: Sequence[XliffRecord] = [] for idx, segment in enumerate(segments): - assert isinstance(segment, XliffSegment) + original = segment.original_segment + assert isinstance(original, XliffSegment) + if segment.approved: + original.approved = True xliff_records.append( XliffRecord( parent_id=doc_records[idx].id, document_id=doc.xliff.id, - segment_id=segment.id_, - state=segment.state.value, + segment_id=original.id_, + state=original.state.value, ) ) - doc_records[idx].approved = segment.approved session.add_all(xliff_records) session.commit() elif doc.type == DocumentType.txt: txt_records: Sequence[TxtRecord] = [] for idx, segment in enumerate(segments): - assert isinstance(segment, TxtSegment) + original = segment.original_segment + assert isinstance(original, TxtSegment) txt_records.append( TxtRecord( parent_id=doc_records[idx].id, document_id=doc.txt.id, - offset=segment.offset, + offset=original.offset, ) ) session.add_all(txt_records) diff --git a/frontend/mocks/documentMocks.ts b/frontend/mocks/documentMocks.ts index 16aa97d..c549200 100644 --- a/frontend/mocks/documentMocks.ts +++ b/frontend/mocks/documentMocks.ts @@ -187,6 +187,7 @@ const segments: DocumentRecord[] = [ target: 'Зацепки приключения', repetitions_count: 2, has_comments: false, + translation_src: 'glossary', }, { id: 10001, @@ -197,6 +198,7 @@ const segments: DocumentRecord[] = [ 'В тот момент, когда кинидийцы извлекли рог из монолита, их город был обречен.', repetitions_count: 1, has_comments: true, + translation_src: 'tm', }, { id: 10002, @@ -205,6 +207,16 @@ const segments: DocumentRecord[] = [ target: 'Зацепки приключения', repetitions_count: 2, has_comments: true, + translation_src: 'mt', + }, + { + id: 10003, + approved: true, + source: 'Adventure Hooks', + target: 'Зацепки приключения', + repetitions_count: 3, + has_comments: false, + translation_src: null, }, ] diff --git a/frontend/src/client/schemas/DocumentProcessingSettings.ts b/frontend/src/client/schemas/DocumentProcessingSettings.ts index b71c73f..f343ba5 100644 --- a/frontend/src/client/schemas/DocumentProcessingSettings.ts +++ b/frontend/src/client/schemas/DocumentProcessingSettings.ts @@ -2,11 +2,8 @@ import {LlmTranslatorSettings} from './LlmTranslatorSettings' import {YandexTranslatorSettings} from './YandexTranslatorSettings' -import {TranslationMemoryUsage} from './TranslationMemoryUsage' export interface DocumentProcessingSettings { - substitute_numbers: boolean machine_translation_settings: LlmTranslatorSettings | YandexTranslatorSettings | null - memory_usage: TranslationMemoryUsage similarity_threshold?: number } diff --git a/frontend/src/client/schemas/DocumentRecord.ts b/frontend/src/client/schemas/DocumentRecord.ts index 1a545fa..39ff479 100644 --- a/frontend/src/client/schemas/DocumentRecord.ts +++ b/frontend/src/client/schemas/DocumentRecord.ts @@ -1,5 +1,7 @@ // This file is autogenerated, do not edit directly. +import {RecordSource} from './RecordSource' + export interface DocumentRecord { id: number source: string @@ -7,4 +9,5 @@ export interface DocumentRecord { approved: boolean repetitions_count: number has_comments: boolean + translation_src: RecordSource | null } diff --git a/frontend/src/client/schemas/RecordSource.ts b/frontend/src/client/schemas/RecordSource.ts new file mode 100644 index 0000000..35fe8d6 --- /dev/null +++ b/frontend/src/client/schemas/RecordSource.ts @@ -0,0 +1,3 @@ +// This file is autogenerated, do not edit directly. + +export type RecordSource = 'glossary' | 'mt' | 'tm' | 'fm' diff --git a/frontend/src/client/schemas/TranslationMemoryUsage.ts b/frontend/src/client/schemas/TranslationMemoryUsage.ts deleted file mode 100644 index fdadb5c..0000000 --- a/frontend/src/client/schemas/TranslationMemoryUsage.ts +++ /dev/null @@ -1,3 +0,0 @@ -// This file is autogenerated, do not edit directly. - -export type TranslationMemoryUsage = 'newest' | 'oldest' diff --git a/frontend/src/components/DocSegment.vue b/frontend/src/components/DocSegment.vue index 959314d..50fb7a6 100644 --- a/frontend/src/components/DocSegment.vue +++ b/frontend/src/components/DocSegment.vue @@ -4,6 +4,7 @@ import {computed, ref, useTemplateRef, watch} from 'vue' import Button from 'primevue/button' import {cleanableDebounce} from '../utilities/utils' +import {RecordSource} from '../client/schemas/RecordSource' const props = defineProps<{ id: number @@ -15,6 +16,7 @@ const props = defineProps<{ approved?: boolean repetitionsCount?: number hasComments?: boolean + recordSrc?: RecordSource }>() const emit = defineEmits<{ @@ -74,6 +76,36 @@ const icon = computed( const showCommentsDialog = () => { emit('addComment') } + +const segSourceTitle = computed(() => { + switch (props.recordSrc) { + case 'glossary': + return 'Glossary term' + case 'mt': + return 'Machine translation' + case 'tm': + return 'Translation memory' + case 'fm': + return 'Exact match' + default: + return undefined + } +}) + +const segSourceIcon = computed(() => { + switch (props.recordSrc) { + case 'glossary': + return 'pi-globe' + case 'mt': + return 'pi-language' + case 'tm': + return 'pi-database' + case 'fm': + return 'pi-equals' + default: + return undefined + } +})