diff --git a/.gitignore b/.gitignore index 5b13342ff2..1cf7bbe649 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,10 @@ src/resources/dictionaries/*.txt deploy/scripts/semantic_domains/json/*.json database/semantic_domains/* +# Intermediate and output files for tutorial video subtitling +*.srt +*.mp4 + # Combine installer installer/*.run installer/makeself-* diff --git a/Backend/Helper/GrammaticalCategory.cs b/Backend/Helper/GrammaticalCategory.cs index e51c692af6..40c36ace99 100644 --- a/Backend/Helper/GrammaticalCategory.cs +++ b/Backend/Helper/GrammaticalCategory.cs @@ -50,7 +50,7 @@ public bool Matches(string gramCat) } } - // The following patterns cover all grammatical categories in Fieldworks for: + // The following patterns cover all grammatical categories in FieldWorks for: // English (en), Spanish (es), French (fr), Portuguese (pt), Russian (ru), Chinese (zh) // Omissions due to conflicting abbreviations: // Spanish "indf" for Indefinite Pronoun (conflicts with abbrev. for Indefinite article) diff --git a/README.md b/README.md index 92a14bc9f7..9b93d65325 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ A rapid word collection tool. See the [User Guide](https://sillsdev.github.io/Th 7. [Add or Update Dictionary Files](#add-or-update-dictionary-files) 8. [Cleanup Local Repository](#cleanup-local-repository) 9. [Generate Installer Script for The Combine](#generate-installer-script-for-the-combine-linux-only) + 10. [Generate Tutorial Video Subtitles](#generate-tutorial-video-subtitles) 3. [Setup Local Kubernetes Cluster](#setup-local-kubernetes-cluster) 1. [Install Rancher Desktop](#install-rancher-desktop) 2. [Install Docker Desktop](#install-docker-desktop) @@ -544,6 +545,16 @@ To update the PDF copy of the installer README.md file, run the following from t pandoc --pdf-engine=weasyprint README.md -o README.pdf ``` +## Generate Tutorial Video Subtitles + +Tutorial video transcripts are housed in `docs/tutorial_subtitles`, together with timestamps aligning transcripts with +the corresponding videos and any transcript translations downloaded from Crowdin. To generate subtitle files (and +optionally attach them to a video file), run from within a Python virtual environment: + +```bash +python scripts/subtitle_tutorial_video.py -s [-i -o ` subfolder holds one `times.txt` and at least one `..txt` where `` is the 3-character +code for the language of the transcript (`eng` as well as any other languages into which the transcripts has been +translated). All these files should have the same number of lines: + +- `.eng.txt`: each line is one sentence; +- `times.txt`: each line has the ending time of the corresponding English sentence in the tutorial video (format: `m:s`, + where `s` can have up to 3 digits after the decimal); +- `..txt` for `` other than `eng`: each line has the translation for the corresponding English sentence + (and if one sentence was translated into multiple sentences, the translation should still be just one line). + +To generate the subtitles and attach them to the video, use `scripts/subtitle_tutorial_video.py`. + +DON'T EDIT THE `.eng.txt` TRANSCRIPT IN A FOLDER WITH A `times.txt` FILE! It matches an existing video. If changes are +needed to the transcript for an updated video, put a copy into the `_in_progress_transcripts_` folder and edit it there. diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/data_entry_1_basics.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/data_entry_1_basics.eng.txt new file mode 100644 index 0000000000..5ef9b279e7 --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/data_entry_1_basics.eng.txt @@ -0,0 +1,43 @@ +The Combine is designed for Rapid Word Collection, a method of gathering words by semantic domain. +In this video, we will see how to do Data Entry in The Combine to collect words. +Let’s go to thecombine.app and log in. +When you click on a project, the semantic domain tree appears. +Selecting your domain is the first step in Data Entry. +If you are doing a different project task (for example, in data cleanup or project settings), you can get back here by clicking the “Data Entry” button in the top bar. +There is another tutorial video about navigating the semantic domain tree or changing its language. +For this video, let’s select domain “2: Person” and start gathering words! +There are 4 things that can be included in a new word. +First, the vernacular form of the word in the project’s vernacular language. +Second, a gloss for the word in the project’s primary analysis language. +(You can change a project’s analysis language in the project settings.) +Third, a note about the word. +Fourth, audio recordings of the word’s pronunciation. +After adding the content of the new word, press the Enter key. +See how the word we just entered appears in the table? +If we hover our cursor over the note icon for that word, the text of the note appears. +Let’s add another word! +Now the vernacular form is required for a new entry, but the gloss, note, and audio are optional. +At any time, you can make changes to the words you have entered. +Let’s add a gloss and a note to the second entry. +Let’s change the vernacular form and delete the note on the first entry. +What can we do with the audio recordings? +If we hover our cursor over the play button (the green triangle icon), text appears describing what options are available. +Click on the play button to listen. +Hold the shift-key and click to delete it. +A dialog box will appear to confirm whether you want to delete the recording. +(If you are using a touch-screen, you can tap on the play button to play, or press and hold the button to bring up a menu.) +When you are done entering words, click the Exit button to return to the semantic domain tree. +(Don’t worry—the words you entered are already saved even if you close the window without clicking the exit button.) +If we select the same domain to enter more words, see how the words we previously entered in this domain are listed in a panel on the side? +If you are working in a narrow web browser window, the panel of previously entered words will not automatically appear. +You can bring it up by pressing the sideways carat icon at the bottom of the data entry box. +Let’s enter more words! +If you want to delete one of the words you added, click on the delete icon at the end of its row. +Warning: this will permanently remove the word and all its content! +If you want to start over on a word you are adding, click on the delete icon here on the bottom row. +It will reset the vernacular field, the gloss field, the note, and the audio recordings for the new word. +That covers how to gather words with Data Entry in The Combine! +When gathering words by semantic domain, often the same word will be added to multiple domains, resulting in lots of duplicate words. +The Combine has ways to help avoid or manage that issue. +In the next video, we will look at entering multiple words with the same vernacular form. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/export_2_combine_to_flex.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/export_2_combine_to_flex.eng.txt new file mode 100644 index 0000000000..ac61eeed4a --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/export_2_combine_to_flex.eng.txt @@ -0,0 +1,24 @@ +Let’s see how to move lexical data from a project in The Combine to a project in FieldWorks. +To begin, log in at thecombine.app and open your project. +Go to project settings by clicking on the gear icon in the top bar. +Click the “Import/Export” tab. +In the “Export Project” section, click the “Export” button. +While your export is loading, the “Export” button will be disabled and have a spinning green circle. +There is also a loading icon with circling arrows in the top bar to indicate that the export is in progress. +If your project has hundreds of audio recordings, it may take a few minutes to prepare the export. +You can navigate to other pages in The Combine without interrupting the export, but do not close the window or log out! +When the export completes, it will be automatically downloaded as a ZIP file to your web browser’s default Downloads location. +To see where the export was downloaded, click on the Downloads icon in the browser, move your cursor to the most recent download, and click on the “Show in Folder” folder icon. +Now we can import that downloaded ZIP file into FieldWorks. +Open FieldWorks Language Explorer and open the project you want to import your data into. +If you are creating a new FLEx project, specify the same vernacular language as your project in The Combine. +When the project is open, click on the “File” menu, move your cursor to “Import…” near the bottom, and click on “Lexicon from The Combine…”. +In the “Import/Merge from The Combine” dialog that appears, click the “Browse…” button. +Another dialog appears with a file explorer. +Navigate to the downloads folder where the exported ZIP file is located. Select the ZIP file and click the “Open” button. +Back at the “Import/Merge” dialog, click the “OK” button. +When FieldWorks finishes the import, a summary page opens in your web browser. +Congratulations, you’ve imported your data from The Combine into FieldWorks! +If you need to update the vernacular or analysis writing systems of your FLEx project, the options are available at the bottom of the “Format” menu. +I hope this video helps you use your data from The Combine. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_2_senses.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_2_senses.eng.txt new file mode 100644 index 0000000000..5983c6d9bb --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_2_senses.eng.txt @@ -0,0 +1,39 @@ +This is the second video on the Merge Duplicates tool. +In the previous video, we looked at the difference between the “Save & Continue” button and the “Defer” button, and we introduced adding a flag to words. +This video gets to the heart of the merge tool: moving, deleting, and combining senses! +Our first set of potential duplicates is a pair of words with vernacular form “fly”. +The first word has 3 senses: “move through the air” and “go through the air” are definitely the same word. +However, the third sense with gloss “winged insect” is a different concept. It’s a better fit with the second word, which has gloss “housefly”. +To move a sense from one word to another, click on the sense and drag it to the other word. +Voila! Now the correct senses are together. +Wait a second… the two glosses of the first word are not different senses. They are the exact same idea expressed redundantly. +To delete the unnecessary sense, click-and-drag it over the delete icon in the bottom corner. +When the tile turns red, release and it disappears. +When you’re satisfied with your changes, click “Save & Continue”. +In the next set of potential duplicates, we have two words with vernacular form “fine”. +Now the senses we see in the first word are in fact two different senses of the same word, so we can leave it alone. +In the second word, we see one sense with gloss “abcdefg” and other with “fee; monetary penalty”. +These two aren’t related to the first word or to each other. Let’s create a new word with the final sense. +To create a new word, click-and-drag a sense into the empty column, and release. +Voila! Now we have three words. +Wait a second… the sense “abcdefg” in the second word is nonsense. +To delete it, click-and-drag it to the delete icon in the bottom corner. +When you delete the only sense in a word, the whole word is deleted. +See how the column disappeared and we are back to two words. +Great! Click “Save & Continue” to save that work. +In this third set of potential duplicates are two words with vernacular form “toe”. +The sense of the first word has gloss “leg digit” and semantic domain 2.1.3.2. +The sense of the second word has gloss “foot digit” and semantic domain 2.1.3.3. +These are the same sense of the same word. +To combine them, click-and-drag one sense over the other. When both senses are green, release. +Voila! The senses are combined into a single sense with both semantic domains. +Note that a sidebar opened up to show the senses that are being combined. +You can close and open the sidebar by clicking the sideways caret icon. +Now the gloss is “leg digit; foot digit”. +When senses are combined, all the semantic domains are preserved and all glosses of the same language are combined. +To change which gloss comes first, click-and-drag the tiles within the sidebar to reorder them. +If you decide to keep both glosses as separate senses, click-and-drag a sense out of the sidebar and back to the word column. +Now click the “Save & Continue” button to save this merge. +I hope this video helps you to clean up the data you’ve collected in The Combine! +If you want to use the Merge tool on data that was exported from FieldWorks and imported into The Combine, please check out the third Merge Duplicates video. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_3_imported.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_3_imported.eng.txt new file mode 100644 index 0000000000..b7aad8ecee --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/merge_dups_3_imported.eng.txt @@ -0,0 +1,27 @@ +In the first two videos about the Merge Duplicates tool, we covered all the basics. +If you are cleaning up data gathered with The Combine, the first two videos are all you need. +This video covers using the Merge Duplicates tool with data that was imported into The Combine. +Lexical data from FieldWorks can have information that is not supported in The Combine. +However, The Combine is designed to prevent accidental deletion of that information. +In this first set of potential duplicates, note that the top bar is yellow. +That indicates that these three imported entries have information that’s not visible in The Combine. +Such information could include (for example) annotations, etymologies, or variants. +Removing the final sense of a word in the Merge Duplicates tool results in that word being deleted. +Therefore, a lone sense on a protected word cannot be moved. +If we look at this second word, it has two senses. +It is a protected word, but we can move one of the senses without deleting the word. +So let’s move the sense with gloss “correct” to be a second sense of the first word. +Now that the second word only has one sense, that sense cannot be moved. +The third word is a duplicate of the first, but it cannot be deleted. So instead, we can add a flag. +Now click the “Save & Continue” button to save our work. +In this next set of potential duplicates, the tops of the words aren’t yellow but one of the senses is yellow—it is protected. +A sense of an imported word can be protected if it has sense-specific information that isn’t supported in The Combine. +Such information could include (for example) illustrations, reversals, or subsenses. +Protected senses can be moved. +However, protected senses cannot be deleted. +A protected sense also cannot be dropped into another sense. +If you want to merge two senses and one of them is protected, click-and-drag the other sense and drop it into the protected sense. +Merged senses can generally be reordered in the sidebar, but if the top sense is protected, you cannot move another sense above it. +Now click the “Save & Continue” button to save your work. +I hope this video helps you clean up lexical data imported into The Combine. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_1_reviewing.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_1_reviewing.eng.txt new file mode 100644 index 0000000000..0f3833cf7d --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_1_reviewing.eng.txt @@ -0,0 +1,50 @@ +Let’s see how to review all the words in your project on The Combine. +Click the “Data Cleanup” button in the top bar, then select “Review Entries”. +This tool allows you to filter and sort all your word entries. +You can also edit them here, but we look at that in another video. +Each row in the table has one entry with all its content. +The first column has the vernacular form of the entries. +Another column has the number of senses in the entry. +Another column has the entry’s glosses in the project analysis languages. +Note that the glosses from different senses are separated by a vertical line. +Another column has the semantic domains from all of the entries senses, sorted numerically. +Another column has the pronunciation audio recordings. +You can click on a green triangle to play the audio. +If the audio recording has a speaker selected, hover your cursor over the green triangle to see the speaker name. +Another column is for any note attached to the entry. +And another column shows if the entry was flagged (which is usually done in the Merge Duplicates tool). +Hover your cursor over the flag icon to see any text that was included in the flag. +Note that flags are only used within The Combine and will not export with your data. +There are two other columns—for definitions and for part of speech—that will only be available if the project has imported data with that information. +Other lexical info on imported data—including reversals, annotations, and morph types—are not viewable within The Combine. +But don’t worry, that information won’t be lost when you move your data back to FieldWorks! +If you want to change the order of the columns or hide any columns, click on the icon in the top corner with three vertical bars. +Click on the toggle next to a column name to hide or show that column. +Click-and-drag the two horizontal lines next to a toggle to change the order of the columns. +Note that the Vernacular column cannot be hidden or moved. It will always be visible as the first column. +And there are three buttons at the top of this menu, one to hide all columns, one to reset the order of the columns, and one to show all columns. +In the bottom corner, you can change the number of rows to show per page—the options are 10 entries per page, 100 entries per page, or all entries on a single page. +There are also buttons to go to the next page, the last page, the previous page, or the first page. +At the top of each column are several controls. +Click on the arrow icon to sort by that column. +You can sort by vernacular, alphabetically or reverse alphabetically. +You can sort by the number of senses, increasing or decreasing. +You can sort by the gloss text alphabetically or reverse alphabetically. +You can sort by lowest semantic domain number, increasing or decreasing. +You can sort by number of audio recordings, increasing or decreasing. +You can sort by note text, alphabetically or reverse alphabetically. +You can sort by whether or not the entry is flagged, and the flagged entries are sorted by the text of the flag. +The funnel icon at the top of each column can be used to add a filter. +In the vernacular, glosses, note, and flag columns, type text into that column’s filter and only entries containing the typed text in that column will be shown. +You can type a number into the filter of the number of senses column and entries with exactly that many senses will be shown. +Likewise, a number filter in the pronunciations column will only show entries with exactly that many audio recordings. +If you type a speaker name in the filter for the pronunciations column, then you can see all words with an audio recording by that speaker. +The filter on the semantic domains column uses domain ids. +Type “1.2” to show all entries that have a sense in domain 1.2. +To include a domain and all its subdomains, add a period at the end of your filter. +For example, the filter “2.5.” shows entries in domain 2.5 as well as domain 2.5.2, domain 2.5.1.1, domain 2.5.2, etc. +Finally, in the Domains, Pronunciations, and Flag column, you can type a space for the filter to show all entries that have something, anything in that column. +You can only sort by one column at a time, but you can have an active filter in as many columns as you want. +I hope this video helps you review your lexical data in The Combine. +In another video, we will talk about editing entries in this Review Entries tool. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_2_editing.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_2_editing.eng.txt new file mode 100644 index 0000000000..71ccbe1e45 --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/review_entries_2_editing.eng.txt @@ -0,0 +1,74 @@ +Let’s see how to edit words in your project on The Combine. +Click the “Data Cleanup” button in the top bar, then select “Review Entries”. +This table has all the word entries in your project. +In another video, we looked at how to sort and filter this table. +In this video, we look at how to edit an entry. +Each word in your project shows up as a row in this table. +To delete a word, click the delete icon at the right end. +A pop-up will ask you to confirm whether you want to delete the word. +Within the main body of this table, the only way to edit a word is to add or remove pronunciations. +Pronunciation audio recordings are in this column with red circle and green square icons. +To record a new pronunciation for a word, click-and-hold the red circle in the word’s row. +Say the word out loud once or twice, then release the button. +A new green triangle should appear in that row. +Click the green triangle to listen to the recording. +Hover your cursor over the green triangle to see the options for editing the recording. +To delete a recording, hold the Shift key and click the green triangle. +A pop-up will ask you to confirm whether you want to delete the audio recording. +To change the speaker on a recording, right-click the green triangle. +We’ll discuss pronunciation Speakers in another video. +If you are using a touch-screen, press-and-hold on the green triangle and a menu will appear with the options. +So far, we’ve seen how to delete a word entry and how to edit its pronunciation audio. +To edit the other parts of a word, click the pencil icon on the far left. +A dialog will open giving you all the details of that word and the ability to edit everything. +First is the Vernacular section. +Click in the text field to edit the vernacular form of the word. +Note that the background of the section changes from gray to yellow when the content of the section has been changed. +In the top corner of this dialog are two icons. +Click the green checkmark to save changes or the red X to cancel all changes. +If there are any changes when you click the red X, a pop-up will ask you to confirm whether to discard those changes. +Click the “Cancel” button to keep editing, or click the “Confirm” button to discard the edits and return to the table. +After the Vernacular section is the Senses section. +Let’s skip that for the moment and come back to it later in this video. +Below the Senses section is the Pronunciations section. +Here you can add, play, and delete audio recordings exactly the same way as in the table, which we discussed earlier in the video. +Below the Pronunciations section is the Note section. +Click in the text field to update the text of the note. +Click on the dropdown menu on the side if you want to change which analysis language is associated with the note. +The project’s analysis languages can be changed in the project settings, as discussed in another video. +Below the Note section is the Flag section. +Click on the flag icon to toggle between flagged (indicated by a red flag) and unflagged (indicated by a gray flag). +You can also click in the text field to add or edit text for the flag. +Note that if you add text here, the word is automatically flagged; the gray flag icon changes to red. +Or if you have a flag with text and click the flag icon to unflag the word, then the text disappears. +Let us now return to the Senses section. +Within this section there is a row for every sense. +To minimize this section and only show a summary of all the senses, click on the double diagonal arrows in the corner. +Click again to restore the full detailed view. +On the start of each sense row is a set of buttons. +The first column has vertical arrows; click on one of them to change the order of the senses. +The second column has a delete icon and a pencil icon. +Click on the delete icon to delete the sense. +Note that the sense card changes from light gray to dark gray, indicating that the sense will be deleted when edits to this word are saved. +Click on the delete icon a second time to restore the sense. +To edit a sense, click on the pencil icon. +A second dialog will appear that lets you edit all the sense’s information. +Here we see two sections: Glosses and Semantic Domains. +In the Glosses section, you must have at least one gloss for the sense. +If you delete all the glosses, The Combine will not let you save your changes. +In the Semantic Domains section, each domain tag has a little gray X button. +Click the gray X to remove that semantic domain tag. +To add the sense to a new semantic domain, click on the + button. +The semantic domain tree pops up for you to select a domain. +We discuss the various ways to navigate this semantic domain tree in another video. +If you don’t want to select a new semantic domain, click the X icon in the top corner to close the tree. +If the project has imported data and any sense in that data has definitions or a part of speech, there will be sections with that information here in this dialog. +Note that the Semantic Domains section has a yellow background, since we deleted one of the domain tags. +Just like the word editing dialog, this sense editing dialog has two icons in the top corner: a green checkmark to confirm changes and a red X to discard changes. +If there are any changes in this sense when you click the red X, a pop-up will ask you to confirm whether to discard those changes. +If there are any changes in this sense when you click the green checkmark, the sense editing dialog closes and now the card for that sense has a yellow background. +The changes to that sense are not yet permanent. They will be saved or discarded with all other word changes when you finish with the word editing dialog. +In the bottom row of the Senses section, there is a + icon; click this to add a new sense. +A sense editing dialog pops up for you to fill in the details of the new sense. +I hope this video helps you clean up your lexical data in The Combine. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/_in_progress_transcripts_/sem_dom_1_basics.eng.txt b/docs/tutorial_subtitles/_in_progress_transcripts_/sem_dom_1_basics.eng.txt new file mode 100644 index 0000000000..609d4f37f5 --- /dev/null +++ b/docs/tutorial_subtitles/_in_progress_transcripts_/sem_dom_1_basics.eng.txt @@ -0,0 +1,42 @@ +The Combine is designed for Rapid Word Collection, a method of gathering words by semantic domain. +In this video, we will see how to select a semantic domain in The Combine to begin collecting words in that domain. +Then at the end, we’ll see how to change the language of the semantic domains. +Let’s go to thecombine.app and log in. +When you click on a project, the semantic domain tree appears. +If you are doing a different project task (for example, in data cleanup or project settings), you can get back to the semantic domain tree by clicking the “Data Entry” button in the top bar. +Now the easiest way to get to a domain is using the domain’s id number. +If I want to collect words for domain 6.5.1.1, “House”, I can click in the field that says “Find a domain” and type 6.5.1.1 +In fact, I don’t even need to type the periods. If I type 6511, the periods are automatically added. +Now press enter, and the domain appears. +Yes, this is the domain we want! +Click on the domain tile. +The semantic domain tree disappears and the interface for data entry appears. +We’ll discuss this data entry tool in another video. +When you are done collecting words in this domain, click the “Exit” button to return to the semantic domain tree. +Instead of typing a domain number, you can also navigate the tree by clicking on tiles. +For example, I can click on the “6.5.1.2 Types Of Houses” tile that is beside the current domain to move to that sibling domain. +Or I can click the “6.5.1 Building” tile above to move to the parent domain. +You can also use the left and right arrow keys to navigate between siblings, the up arrow to go to the parent domain, or the down arrow to go to the first child domain. +To return to the top of the domain tree, click on the double-caret icon in the top corner. +So now you know how to select a semantic domain for data collection. +You can type the domain id number or you can navigate the tree by clicking the domain tiles or using your arrow keys. +What if you need the semantic domains to be in a language other than English? +By default, the semantic domain language will be the same as the user-interface language. +The user-interface language will automatically match your browser language, if that language is available for The Combine. +To manually choose your user-interface language, click on your username or the user avatar in the top bar. +This opens the user menu. Click on “User Settings”. +Under “User-interface language”, there is a dropdown menu for you to select your preferred language. +After you’ve made your selection, click the “Save” button. +Now my user interface is in Spanish! +I can get back to the semantic domain tree by clicking the “Data Entry”, or rather the “Entrada de Datos” button in the top bar. +Success! +The domain names are all in Spanish. +In fact, there are more languages available for the semantic domains than for the user interface. +Let me switch back to English to continue this tutorial. +(Note that if you use the browser language to change the user interface language, you may have to refresh the page for the change to take effect.) +Now that we’re back to English, let’s click on the gear icon in the top bar to open the project settings. +Here in the “Languages” tab, there is a drop-down menu under “Semantic Domains”. +If a project administrator selects one of these languages, such as Hindi, that will override the semantic domain language for all project users. +Now let’s go back to “Data Entry”; see how the semantic domains are now in Hindi, even though the user interface is still in English. +I hope this video helps you get your word collection started. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/export_1_flex_to_combine/export_1_flex_to_combine.eng.txt b/docs/tutorial_subtitles/export_1_flex_to_combine/export_1_flex_to_combine.eng.txt new file mode 100644 index 0000000000..8fc17b6f67 --- /dev/null +++ b/docs/tutorial_subtitles/export_1_flex_to_combine/export_1_flex_to_combine.eng.txt @@ -0,0 +1,44 @@ +Let’s see how to move lexical data from a project in FieldWorks to a project in The Combine. +To begin, open your project in FieldWorks. +Here I’m using an example project with words from the Naskapi language. +With the desired FLEx project open, click on the “File” menu, then select “Export…” near the bottom of the menu. +In the “Export” dialog that appears, click on the “Full Lexicon” “LIFT 0.13 XML” option, then click the “Export…” button. +Another dialog appears for you to select where the exported files will be saved. +You’ll need to create a new folder for the files. +I’m going to Desktop and clicking the “Make New Folder” button to create a Naskapi folder. +Select the new folder you just created and click the “OK” button. +In the File Explorer, go to the folder that contains this new folder. +Right click on the folder you just created for the export and select “Compress to ZIP file”. +See the ZIP file that was created? +This is what we are going to import into The Combine. +Now we open a web browser and go to thecombine.app. +Once we are logged in, we see two sections: “Select Project” and “Create Project”. +Under “Select Project” we can open a previously created project. +It IS possible to import the lexical data into an existing project, but we will look at that later. +Under “Create Project”, let’s create a new project using the export from FLEx. +First I’ll type a name for the project, in my case: Naskapi. +Notice there are fields below where we can specify the Vernacular Language and an Analysis Language for the project. +This is not necessary when we are importing data because project languages will automatically be gathered from the data. +To upload existing data, click the “Browse” button. +This brings up a file explorer dialog for you to select the LIFT data that was exported from FLEx. +I’m navigating to Desktop, where I exported my data, selecting the “Naskapi.zip” file, and clicking the “Open” button. +See that The Combine has the text “File selected: Naskapi.zip”. +Great! +Under “Vernacular Language” there is now a drop-down menu. +Use it to select which of the languages in the data is to be the Vernacular Language. +The Combine only supports data entry for one vernacular language per project. +The vernacular language cannot be changed after the project has been created. +If you need to gather or organize lexical data for a different language, simply create another project. +Note that you can no longer specify an analysis language. +This is because all analysis languages present in the ZIP file are automatically added, and you can add and remove analysis languages in the project at any time. +All that’s left is to click the “Create Project” button. +Once the project is created, you are taken to the project settings page. +To access this in the future, you can click on the gear icon in the top bar. +Note that here in the “Languages” tab, we can see the Vernacular Language as well as review and change the Analysis Languages. +Let’s click on the “Import/Export” tab. +This is where you can import lexical data into an existing project. +That option is disabled now because we have already imported data into this project. +Only one import is allowed for a project in The Combine. +This is also where we can export data from The Combine to import into FieldWorks, but that is a topic for another video. +I hope this video helps you get started with The Combine. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/export_1_flex_to_combine/times.txt b/docs/tutorial_subtitles/export_1_flex_to_combine/times.txt new file mode 100644 index 0000000000..81ed983d49 --- /dev/null +++ b/docs/tutorial_subtitles/export_1_flex_to_combine/times.txt @@ -0,0 +1,44 @@ +0:8 +0:14 +0:19 +0:30 +0:44 +0:50 +0:54 +1:11 +1:17 +1:25 +1:35 +1:39 +1:44 +1:56 +2:3 +2:9 +2:17 +2:24 +2:32 +2:41 +2:48.5 +2:54 +3:1.5 +3:16.5 +3:24 +3:25.5 +3:29 +3:38 +3:44 +3:49 +3:56.5 +4:2 +4:13.5 +4:20.5 +4:26.5 +4:33 +4:44 +4:47.5 +4:52.5 +4:59.5 +5:4 +5:13 +5:16 +5:19 diff --git a/docs/tutorial_subtitles/merge_dups_1_basics/merge_dups_1_basics.eng.txt b/docs/tutorial_subtitles/merge_dups_1_basics/merge_dups_1_basics.eng.txt new file mode 100644 index 0000000000..2d35da3694 --- /dev/null +++ b/docs/tutorial_subtitles/merge_dups_1_basics/merge_dups_1_basics.eng.txt @@ -0,0 +1,42 @@ +When you collect words by semantic domain, sometimes the same word is entered multiple times. +The Combine has a tool for finding duplicate entries and merging them together into a single entry. +Let’s log in at thecombine.app and select a project that has duplicate entries. +Click the “Data Cleanup” button in the top bar, then select “Merge Duplicates”. +The Combine will find sets of words with identical vernacular form and present them to you one set at a time. +In this project, the first set of potential duplicates is a pair of words with vernacular form “hard”. +One has gloss “difficult” and the other has gloss “not soft”. +Are those two different senses of the same word or two different words? +If we aren’t sure, or don’t want to make that decision now, we can press the “Defer” button at the bottom. +That moves us to the next set and prevents the deferred set from coming up again here in the “Merge Duplicates” tool. +There’s another tool for considering sets that have been deferred. +We’ll look at that later in this video. +This second set of potential duplicates has two words with vernacular form “bank”. +One has gloss “side of river” and the other has gloss “financial institution”. +These are definitely different words—homographs that we do not want to merge. +Click the “Save & Continue” button at the bottom to confirm that they are distinct words. +This pair will not show up again in the “Merge Duplicates”, nor in the list of deferred sets. +Now we’ve seen what the “Save & Continue” and “Defer” buttons do. +One important note: if a user later changes any of the words in a set of potential duplicates, then that set can show up again in the “Merge Duplicates” tool. +For example, if you click “Save & Continue” for a set of words that are not duplicates, and then later add a semantic domain to a sense of one of the words, that set may come up again as potential duplicates. +Or if you click “Defer” for a set, and then another user adds an audio recording for one of the words, then that set will show up again in “Merge Duplicates”, rather than among the deferred sets. +Let’s move on. +The Combine asks if we want to keep merging. +If we click “No”, it will take us back to the “Data Cleanup” page. +If we click “Yes”, it will look for more sets of potential duplicates. +After The Combine can’t find any more words with identical vernacular form, it will suggest words with similar vernacular forms. +This will help catch duplicates where one of the words has a typo or is using alternate spelling. +Here we see two words with vernacular form “present”, but the second one has two “t”s at the end. +Hmm, that must be a typo. +A gift is spelled “p-r-e-s-e-n-t” not “p-r-e-s-e-n-t-t”. +You cannot edit the text in the “Merge Duplicates” tool. +However, you can click on the flag icon to add a flag and mention what needs to be fixed. +Note that for the flag to be saved, you must click “Save & Continue”. +We can leave the merging at any time by clicking on the “Data Cleanup” button, but then unsaved changes will be discarded. +Back in “Data Cleanup”, there’s a “Review Entries” tool. +That’s where you can review and edit all words in the project, and fix any words that were flagged while merging duplicates. +We’ll cover the “Review Entries” tool in another video. +Note that there is another option here that wasn’t here before: “Review Deferred Duplicates”. +Click on it to review sets of potential duplicates that were previously deferred. +Well, that’s all for this first tutorial video on the “Merge Duplicates” tool. +In the next video, we look at moving, deleting, and combining senses. +Have a wonderful day! diff --git a/docs/tutorial_subtitles/merge_dups_1_basics/times.txt b/docs/tutorial_subtitles/merge_dups_1_basics/times.txt new file mode 100644 index 0000000000..20a9d77fee --- /dev/null +++ b/docs/tutorial_subtitles/merge_dups_1_basics/times.txt @@ -0,0 +1,42 @@ +0:8 +0:16 +0:25 +0:33 +0:41 +0:52 +1:0 +1:5.5 +1:14 +1:22.5 +1:26.5 +1:30 +1:37 +1:43 +1:49 +1:56 +2:5 +2:11 +2:22 +2:37 +2:50.5 +2:52.5 +2:56 +3:1 +3:6 +3:16 +3:24 +3:31 +3:33.5 +3:41 +3:44.75 +3:59 +4:7 +4:20 +4:30 +4:38 +4:43 +4:51 +4:58 +5:4 +5:9 +5:11 diff --git a/scripts/subtitle_tutorial_video.py b/scripts/subtitle_tutorial_video.py new file mode 100644 index 0000000000..32d792daec --- /dev/null +++ b/scripts/subtitle_tutorial_video.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +""" +Add subtitles to a tutorial video. +If video path is not provided, still generates .srt files. +If video path is provided, requires ffmpeg to be installed. +""" + +import argparse +import logging +from pathlib import Path +import re +from subprocess import run +from typing import List + +combine_dir = Path(__file__).resolve().parent.parent +subtitles_dir = combine_dir / "docs" / "tutorial_subtitles" + + +def parse_args() -> argparse.Namespace: + """Define command line arguments for parser.""" + parser = argparse.ArgumentParser( + description="Add subtitles to a tutorial video.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + in_arg = parser.add_argument( + "--input", + "-i", + help="Path of tutorial video that needs subtitles added.", + ) + out_arg = parser.add_argument( + "--output", + "-o", + help="Desired path of tutorial video with subtitles added.", + ) + parser.add_argument( + "--subtitles", + "-s", + help="Name of the docs/tutorial_subtitles subfolder for this video", + required=True, + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Print intermediate values to aid in debugging.", + ) + args = parser.parse_args() + + if args.input: + args.input = Path(args.input) + if not args.output: + raise argparse.ArgumentError(out_arg, "missing (required since -i/--input was given)") + if args.output: + args.output = Path(args.output) + if not args.input: + raise argparse.ArgumentError(in_arg, "missing (required since -o/--output was given)") + + return args + + +def create_srt_file(script_path: Path, out_path: Path, times_strings: List[str]) -> None: + num_lines = len(times_strings) + with open(script_path, "r", encoding="utf-8") as in_file: + in_lines = [line.strip() for line in in_file.readlines() if len(line.strip()) > 0] + if len(in_lines) != num_lines: + logging.error( + f"Transcript {script_path} should have {num_lines} lines but has {len(in_lines)}" + ) + exit(1) + out_lines = [f"{i+1}\n{times_strings[i]}\n{in_lines[i]}\n\n" for i in range(num_lines)] + with open(out_path, "w", encoding="utf-8") as out_file: + out_file.writelines(out_lines) + + +def main() -> None: + args = parse_args() + if args.verbose: + logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) + else: + logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.WARNING) + + if args.input and not args.input.is_file(): + logging.error("Input video doesn't exist") + exit(1) + + subtitles_path: Path = subtitles_dir / args.subtitles + if not subtitles_path.is_dir(): + logging.error("Subtitles subfolder of docs/tutorial_subtitles/ doesn't exist") + exit(1) + + times_path = subtitles_path / "times.txt" + if not times_path.is_file(): + logging.error("Subtitles subfolder is missing the required 'times.txt' file") + exit(1) + + logging.info(f"Extracting timestamps from {times_path}") + times_strings: List[str] = [] + with open(times_path, "r", encoding="utf-8") as file: + start = "00:00:0,000" + for line in file.readlines(): + line = line.strip() + if not line: + continue + times = re.split(r":|\.", line) + msec = times[2] if len(times) > 2 else "" + end = f"00:{times[0]:0>2}:{times[1]},{msec:0<3}" + times_strings.append(f"{start} --> {end}") + start = end + + logging.info("Generating subtitle .srt files") + langs: List[str] = [] + for script_file in subtitles_path.glob(f"{args.subtitles}.*.txt"): + script_path = subtitles_path / script_file + lang = script_file.suffixes[0][1:] + langs.append(lang) + logging.info(f"Generating subtitles file for language: {lang}") + out_path = subtitles_path / f"{args.subtitles}.{lang}.srt" + create_srt_file(script_path, out_path, times_strings) + logging.info(f"Languages found: {', '.join(langs)}") + + if args.input and args.output: + logging.info(f"Attaching subtitles to {args.input}") + i_strings: List[str] = [] + map_strings: List[str] = ["-map", "0"] + metadata_strings: List[str] = [] + for i in range(len(langs)): + in_path = subtitles_path / f"{args.subtitles}.{langs[i]}.srt" + i_strings.extend(["-i", str(in_path)]) + map_strings.extend(["-map", str(i + 1)]) + metadata_strings.extend([f"-metadata:s:s:{i}", f"language={langs[i]}"]) + exec_command: List[str] = ( + ["ffmpeg", "-i", args.input] + + i_strings + + map_strings + + ["-c", "copy", "-c:s", "mov_text"] + + metadata_strings + + [args.output] + ) + logging.info(f"Executing: {exec_command}") + completed_run = run(exec_command) + if completed_run.returncode == 0: + logging.info(f"Video with subtitles saved to {args.output}") + else: + logging.error("Execution failed.") + else: + logging.info("No -i and -o file paths provided, so ffmpeg was not run.") + + +if __name__ == "__main__": + main()