diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 0129ce4..c9bb009 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -5,9 +5,9 @@ name: Python package on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] + branches: ["main"] jobs: linter: @@ -18,29 +18,28 @@ jobs: python-version: ["3.8"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt-get install libsndfile1 - python -m pip install --upgrade pip - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install -r requirements.txt - pip install -r requirements-dev.txt - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: isort - run: isort --check --diff . - - name: black - run: black --check --diff . + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: | + sudo apt-get install libsndfile1 + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: isort + run: isort --check --diff . + - name: black + run: black --check --diff . mypy: runs-on: ubuntu-latest @@ -49,44 +48,42 @@ jobs: matrix: python-version: ["3.8"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt-get install libsndfile1 - python -m pip install --upgrade pip - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install -r requirements.txt - pip install -r requirements-dev.txt - - name: mypy - run: mypy --install-types --non-interactive ./ --cache-dir=.mypy_cache/ + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: | + sudo apt-get install libsndfile1 + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: mypy + run: mypy --install-types --non-interactive ./ --cache-dir=.mypy_cache/ unit_test: runs-on: ubuntu-latest + timeout-minutes: 20 strategy: fail-fast: false matrix: python-version: ["3.8"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt-get install libsndfile1 - python -m pip install --upgrade pip - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install -r requirements.txt - pip install -r requirements-dev.txt - - name: pytest_unit - run: pytest tests/unit_tests/ + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: | + sudo apt-get install libsndfile1 + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: pytest_unit + run: pytest -s -v tests/unit_tests/ integration_test: runs-on: ubuntu-latest @@ -96,18 +93,31 @@ jobs: python-version: ["3.8"] timeout-minutes: 40 steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - sudo apt-get install libsndfile1 - python -m pip install --upgrade pip - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install -r requirements.txt - pip install -r requirements-dev.txt - - name: pytest_integration - run: pytest tests/integration_tests/ --ignore-glob='*sonar*.py' + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install dependencies + run: | + sudo apt-get install libsndfile1 + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + - name: pytest_integration + run: pytest -s -v tests/integration_tests/ --ignore-glob='*sonar*.py' diff --git a/CODE_LICENSE.md b/CODE_LICENSE.md new file mode 100644 index 0000000..00ab1b5 --- /dev/null +++ b/CODE_LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Meta Research + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSE.md b/LICENSE.md index 2a96631..8d65a07 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,408 +1,70 @@ -LICENSE: CC-BY-NC-4.0 +SONAR code is released under the MIT license (see [CODE_LICENSE](CODE_LICENSE.md)). + +Part of SONAR models are released with MIT license too: + +| lang_code | language | url | +| --------- | ---------------- | ------------------------------------------------------ | +| arb | afrikaans | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.arb.pt | +| cat | catalan | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cat.pt | +| cym | welsh | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cym.pt | +| dan | danish | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.dan.pt | +| deu | german | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.deu.pt | +| est | estonian | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.est.pt | +| fin | finnish | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fin.pt | +| fra | french | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fra.pt | +| ind | indonesian | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ind.pt | +| ita | italian | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ita.pt | +| kor | korean | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.kor.pt | +| nld | dutch | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.nld.pt | +| pes | western persian | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.pes.pt | +| por | portuguese | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.por.pt | +| ron | romanian | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ron.pt | +| spa | spanish | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.spa.pt | +| swe | swedish | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swe.pt | +| swh | swahili | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swh.pt | +| tgl | tagalog | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tgl.pt | +| tur | turkish | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tur.pt | +| uzn | northern uzbek | https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.uzn.pt | + + +BUT BEWARE, the following models are released under a non commercial license (see [NC_MODEL_LICENSE](NC_MODEL_LICENSE.md)): + +| lang_code | language | url | +| --------- | ---------------- | ------------------------------------------------------ | +| asm | assamese | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.asm.pt | +| bel | belarussian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bel.pt | +| ben | bengali | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ben.pt | +| bos | bosnian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bos.pt | +| bul | bulgarian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bul.pt | +| ces | czech | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ces.pt | +| cmn | mandarin chinese | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.cmn.pt | +| guj | gujurati | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.guj.pt | +| heb | hebrew | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.heb.pt | +| hin | hindi | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hin.pt | +| hrv | croatian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hrv.pt | +| jpn | japanse | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.jpn.pt | +| kan | kannada | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.jan.pt | +| lao | lao | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lao.pt | +| lit | lithaian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lit.pt | +| lvs | standard latvian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lvs.pt | +| mal | malayalam | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mal.pt | +| mar | marathi | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mar.pt | +| mkd | macedonian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mkd.pt | +| mlt | maltese | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mlt.pt | +| npi | nepali | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.npi.pt | +| ory | odia | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ory.pt | +| pan | punjabi | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.pan.pt | +| pol | polish | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.po.pt | +| rus | russian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.rus.pt | +| slk | slovak | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slk.pt | +| slv | slovenian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slv.pt | +| snd | sindhi | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.snd.pt | +| srp | serbian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.srp.pt | +| tam | tamil | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tam.pt | +| tel | telugu | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tel.pt | +| tha | thai | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tha.pt | +| ukr | ukrainian | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ukr.pt | +| urd | urdu | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.urd.pt | +| vie | vietnamese | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.vie.pt | +| yue | yue | https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.yue.pt | -Copyright (c) Meta Platforms, Inc. and affiliates. -All rights reserved. - -This source code is licensed under the license found in the -LICENSE file in the root directory of this source tree. - - -Attribution-NonCommercial 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More_considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution-NonCommercial 4.0 International Public -License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution-NonCommercial 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You -such rights in consideration of benefits the Licensor receives from -making the Licensed Material available under these terms and -conditions. - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - d. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - e. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - f. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - g. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - h. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - i. NonCommercial means not primarily intended for or directed towards - commercial advantage or monetary compensation. For purposes of - this Public License, the exchange of the Licensed Material for - other material subject to Copyright and Similar Rights by digital - file-sharing or similar means is NonCommercial provided there is - no payment of monetary compensation in connection with the - exchange. - - j. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - k. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - l. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part, for NonCommercial purposes only; and - - b. produce, reproduce, and Share Adapted Material for - NonCommercial purposes only. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties, including when - the Licensed Material is used other than for NonCommercial - purposes. - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's - License You apply must not prevent recipients of the Adapted - Material from complying with this Public License. - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database for NonCommercial purposes - only; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material; and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/NC_MODEL_LICENSE.md b/NC_MODEL_LICENSE.md new file mode 100644 index 0000000..2a96631 --- /dev/null +++ b/NC_MODEL_LICENSE.md @@ -0,0 +1,408 @@ +LICENSE: CC-BY-NC-4.0 + +Copyright (c) Meta Platforms, Inc. and affiliates. +All rights reserved. + +This source code is licensed under the license found in the +LICENSE file in the root directory of this source tree. + + +Attribution-NonCommercial 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-NonCommercial 4.0 International Public +License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-NonCommercial 4.0 International Public License ("Public +License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You +such rights in consideration of benefits the Licensor receives from +making the Licensed Material available under these terms and +conditions. + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. NonCommercial means not primarily intended for or directed towards + commercial advantage or monetary compensation. For purposes of + this Public License, the exchange of the Licensed Material for + other material subject to Copyright and Similar Rights by digital + file-sharing or similar means is NonCommercial provided there is + no payment of monetary compensation in connection with the + exchange. + + j. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + k. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + l. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part, for NonCommercial purposes only; and + + b. produce, reproduce, and Share Adapted Material for + NonCommercial purposes only. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties, including when + the Licensed Material is used other than for NonCommercial + purposes. + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database for NonCommercial purposes + only; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/README.md b/README.md index 456ac66..67f6b54 100644 --- a/README.md +++ b/README.md @@ -160,42 +160,62 @@ All 200 languages from the [No Language Left Behind project](https://arxiv.org/a | lang_code | language | link | | --------- | ---------------- | ------------------------------------------------------------------ | | arb | afrikaans | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.arb.pt) | -| ben | bengali | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ben.pt) | +| asm | assamese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.asm.pt) | +| bel | belarussian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bel.pt) | +| ben | bengali | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ben.pt) | +| bos | bosnian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bos.pt) | +| bul | bulgarian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bul.pt) | | cat | catalan | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cat.pt) | -| ces | czech | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ces.pt) | -| cmn | mandarin chinese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cmn.pt) | +| ces | czech | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ces.pt) | +| cmn | mandarin chinese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.cmn.pt) | | cym | welsh | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cym.pt) | | dan | danish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.dan.pt) | | deu | german | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.deu.pt) | | est | estonian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.est.pt) | | fin | finnish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fin.pt) | | fra | french | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fra.pt) | -| hin | hindi | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.hin.pt) | +| guj | gujurati | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.guj.pt) | +| heb | hebrew | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.heb.pt) | +| hin | hindi | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hin.pt) | +| hrv | croatian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hrv.pt) | | ind | indonesian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ind.pt) | | ita | italian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ita.pt) | -| jpn | japanse | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.jpn.pt) | -| kan | kannada | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.jan.pt) | +| jpn | japanse | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.jpn.pt) | +| kan | kannada | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.jan.pt) | | kor | korean | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.kor.pt) | -| mlt | maltese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.mlt.pt) | +| lao | lao | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lao.pt) | +| lit | lithaian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lit.pt) | +| lvs | standard latvian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lvs.pt) | +| mal | malayalam | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mal.pt) | +| mar | marathi | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mar.pt) | +| mkd | macedonian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mkd.pt) | +| mlt | maltese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mlt.pt) | +| npi | nepali | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.npi.pt) | | nld | dutch | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.nld.pt) | +| ory | odia | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ory.pt) | +| pan | punjabi | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.pan.pt) | | pes | western persian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.pes.pt) | -| pol | polish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.po.pt) | +| pol | polish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.po.pt) | | por | portuguese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.por.pt) | | ron | romanian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ron.pt) | -| rus | russian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.rus.pt) | -| slk | slovak | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.slk.pt) | +| rus | russian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.rus.pt) | +| slk | slovak | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slk.pt) | +| slv | slovenian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slv.pt) | +| snd | sindhi | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.snd.pt) | +| srp | serbian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.srp.pt) | | spa | spanish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.spa.pt) | | swe | swedish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swe.pt) | | swh | swahili | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swh.pt) | -| tam | tamil | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tam.pt) | -| tel | telugu | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tel.pt) | +| tam | tamil | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tam.pt) | +| tel | telugu | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tel.pt) | | tgl | tagalog | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tgl.pt) | -| tha | thai | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tha.pt) | +| tha | thai | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tha.pt) | | tur | turkish | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tur.pt) | -| ukr | ukrainian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ukr.pt) | -| urd | urdu | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.urd.pt) | +| ukr | ukrainian | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ukr.pt) | +| urd | urdu | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.urd.pt) | | uzn | northern uzbek | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.uzn.pt) | -| vie | vietnamese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.vie.pt) | +| vie | vietnamese | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.vie.pt) | +| yue | yue | [download](https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.yue.pt) | diff --git a/pyproject.toml b/pyproject.toml index 1e4ad3c..d7cd2d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,7 @@ [build-system] -requires = ["flit_core >=3.2,<4", - "setuptools", - "cmake~=3.26.4", - "ninja~=1.11.1" - ] +requires = ["flit_core>=3.2,<4", "setuptools"] build-backend = "flit_core.buildapi" -# tbb-devel==2021.9.0 # wheel~=0.40.0 [project] @@ -29,10 +24,13 @@ classifiers=[ ] dependencies = [ - "fairseq2==0.1.0", + "fairseq2==0.2.*", "numpy>=1.21", "torch", - "torchaudio" + "torchaudio", + "sox", + "soundfile", + "typing_extensions", ] [project.optional-dependencies] @@ -68,6 +66,12 @@ extend-exclude = ''' ) ''' +[tool.flake8] +extend_ignore = ["E", "Y"] # Black +per-file-ignores = [ + "__init__.py:F401", +] + [tool.isort] profile = "black" skip_gitignore = true @@ -87,4 +91,4 @@ minversion = "6.0" testpaths = ["tests/"] python_files = [ "test_*.py" -] \ No newline at end of file +] diff --git a/requirements.txt b/requirements.txt index 73149a0..060002c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -fairseq2==0.1.0 +fairseq2==0.2.* numpy>=1.21 torch torchaudio -tqdm \ No newline at end of file +sox +soundfile +tqdm diff --git a/sonar/__init__.py b/sonar/__init__.py index df47984..b572e0a 100644 --- a/sonar/__init__.py +++ b/sonar/__init__.py @@ -8,4 +8,19 @@ """ -__version__ = "0.1.0" +from pathlib import Path + +from fairseq2.assets import FileAssetMetadataProvider, asset_store + +__version__ = "0.2.0" + + +def _update_asset_store() -> None: + cards_dir = Path(__file__).parent.joinpath("cards") + + # Make sure that the default fairseq2 asset store can resolve cards under + # the directory /cards. + asset_store.metadata_providers.append(FileAssetMetadataProvider(cards_dir)) + + +_update_asset_store() diff --git a/sonar/store/cards/blaser_2_0_ref.yaml b/sonar/cards/blaser_2_0.yaml similarity index 70% rename from sonar/store/cards/blaser_2_0_ref.yaml rename to sonar/cards/blaser_2_0.yaml index da0c228..505a7bf 100644 --- a/sonar/store/cards/blaser_2_0_ref.yaml +++ b/sonar/cards/blaser_2_0.yaml @@ -3,7 +3,15 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + name: blaser_2_0_ref model_type: blaser model_arch: basic_ref checkpoint: "https://huggingface.co/facebook/blaser-2.0-ref/resolve/main/model.pt" + +--- + +name: blaser_2_0_qe +model_type: blaser +model_arch: basic_qe +checkpoint: "https://huggingface.co/facebook/blaser-2.0-qe/resolve/main/model.pt" diff --git a/sonar/store/cards/laser2_text_encoder.yaml b/sonar/cards/laser2_text_encoder.yaml similarity index 92% rename from sonar/store/cards/laser2_text_encoder.yaml rename to sonar/cards/laser2_text_encoder.yaml index 8113d35..d47ab69 100644 --- a/sonar/store/cards/laser2_text_encoder.yaml +++ b/sonar/cards/laser2_text_encoder.yaml @@ -3,8 +3,9 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -name: laser2 + +name: laser2_text_encoder model_type: lstm model_arch: laser2 checkpoint: "https://dl.fbaipublicfiles.com/nllb/laser/laser2.pt" -tokenizer: "https://dl.fbaipublicfiles.com/nllb/laser/laser2.spm" \ No newline at end of file +tokenizer: "https://dl.fbaipublicfiles.com/nllb/laser/laser2.spm" diff --git a/sonar/cards/sonar_speech_encoder.yaml b/sonar/cards/sonar_speech_encoder.yaml new file mode 100644 index 0000000..35ed585 --- /dev/null +++ b/sonar/cards/sonar_speech_encoder.yaml @@ -0,0 +1,523 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +name: sonar_speech_encoder_base +model_type: sonar_speech +model_arch: non_english + +--- + +name: sonar_speech_encoder_arb +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.arb.pt" +default_lang: arb +langs: + - arb + +--- + +name: sonar_speech_encoder_cat +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cat.pt" +default_lang: cat +langs: + - cat + +--- + +name: sonar_speech_encoder_cym +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cym.pt" +default_lang: cym +langs: + - cym + +--- + +name: sonar_speech_encoder_dan +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.dan.pt" +default_lang: dan +langs: + - dan + +--- + +name: sonar_speech_encoder_deu +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.deu.pt" +default_lang: deu +langs: + - deu + +--- + +name: sonar_speech_encoder_eng +base: sonar_speech_encoder_base +model_arch: english +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.eng.pt" +default_lang: eng +langs: + - eng + +--- + +name: sonar_speech_encoder_est +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.est.pt" +default_lang: est +langs: + - est + +--- + +name: sonar_speech_encoder_fin +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fin.pt" +default_lang: fin +langs: + - fin + +--- + +name: sonar_speech_encoder_fra +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fra.pt" +default_lang: fra +langs: + - fra + +--- + +name: sonar_speech_encoder_ind +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ind.pt" +default_lang: ind +langs: + - ind + +--- + +name: sonar_speech_encoder_ita +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ita.pt" +default_lang: ita +langs: + - ita + +--- + +name: sonar_speech_encoder_kor +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.kor.pt" +default_lang: kor +langs: + - kor + +--- + +name: sonar_speech_encoder_nld +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.nld.pt" +default_lang: nld +langs: + - nld + +--- + +name: sonar_speech_encoder_pes +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.pes.pt" +default_lang: pes +langs: + - pes + +--- + +name: sonar_speech_encoder_por +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.por.pt" +default_lang: por +langs: + - por + +--- + +name: sonar_speech_encoder_ron +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ron.pt" +default_lang: ron +langs: + - ron + +--- + +name: sonar_speech_encoder_spa +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.spa.pt" +default_lang: spa +langs: + - spa + +--- + +name: sonar_speech_encoder_swh +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swh.pt" +default_lang: swh +langs: + - swh + +--- + +name: sonar_speech_encoder_tgl +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tgl.pt" +default_lang: tgl +langs: + - tgl + +--- + +name: sonar_speech_encoder_tur +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tur.pt" +default_lang: tur +langs: + - tur + +--- + +name: sonar_speech_encoder_uzn +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.uzn.pt" +default_lang: uzn +langs: + - uzn + +--- + +name: sonar_speech_encoder_asm +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.asm.pt" +default_lang: asm +langs: + - asm + +--- + +name: sonar_speech_encoder_bel +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bel.pt" +default_lang: bel +langs: + - bel + +--- + +name: sonar_speech_encoder_ben +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ben.pt" +default_lang: ben +langs: + - ben + +--- + +name: sonar_speech_encoder_bos +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bos.pt" +default_lang: bos +langs: + - bos + +--- + +name: sonar_speech_encoder_bul +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.bul.pt" +default_lang: bul +langs: + - bul + +--- + +name: sonar_speech_encoder_ces +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ces.pt" +default_lang: ces +langs: + - ces + +--- + +name: sonar_speech_encoder_cmn +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.cmn.pt" +default_lang: cmn +langs: + - cmn + +--- + +name: sonar_speech_encoder_guj +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.guj.pt" +default_lang: guj +langs: + - guj + +--- + +name: sonar_speech_encoder_heb +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.heb.pt" +default_lang: heb +langs: + - heb + +--- + +name: sonar_speech_encoder_hin +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hin.pt" +default_lang: hin +langs: + - hin + +--- + +name: sonar_speech_encoder_hrv +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.hrv.pt" +default_lang: hrv +langs: + - hrv + +--- + +name: sonar_speech_encoder_jpn +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.jpn.pt" +default_lang: jpn +langs: + - jpn + +--- + +name: sonar_speech_encoder_kan +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.kan.pt" +default_lang: kan +langs: + - kan + +--- + +name: sonar_speech_encoder_lao +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lao.pt" +default_lang: lao +langs: + - lao + +--- + +name: sonar_speech_encoder_lit +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lit.pt" +default_lang: lit +langs: + - lit + +--- + +name: sonar_speech_encoder_lvs +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.lvs.pt" +default_lang: lvs +langs: + - lvs + +--- + +name: sonar_speech_encoder_mal +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mal.pt" +default_lang: mal +langs: + - mal + +--- + +name: sonar_speech_encoder_mar +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mar.pt" +default_lang: mar +langs: + - mar + +--- + +name: sonar_speech_encoder_mkd +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mkd.pt" +default_lang: mkd +langs: + - mkd + +--- + +name: sonar_speech_encoder_mlt +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.mlt.pt" +default_lang: mlt +langs: + - mlt + +--- + +name: sonar_speech_encoder_npi +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.npi.pt" +default_lang: npi +langs: + - npi + +--- + +name: sonar_speech_encoder_ory +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ory.pt" +default_lang: ory +langs: + - ory + +--- + +name: sonar_speech_encoder_pan +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.pan.pt" +default_lang: pan +langs: + - pan + +--- + +name: sonar_speech_encoder_pol +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.pol.pt" +default_lang: pol +langs: + - pol + +--- + +name: sonar_speech_encoder_rus +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.rus.pt" +default_lang: rus +langs: + - rus + +--- + +name: sonar_speech_encoder_slk +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slk.pt" +default_lang: slk +langs: + - slk + +--- + +name: sonar_speech_encoder_slv +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.slv.pt" +default_lang: slv +langs: + - slv + +--- + +name: sonar_speech_encoder_snd +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.snd.pt" +default_lang: snd +langs: + - snd + +--- + +name: sonar_speech_encoder_srp +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.srp.pt" +default_lang: srp +langs: + - srp + +--- + +name: sonar_speech_encoder_tam +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tam.pt" +default_lang: tam +langs: + - tam + +--- + +name: sonar_speech_encoder_tel +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tel.pt" +default_lang: tel +langs: + - tel + +--- + +name: sonar_speech_encoder_tha +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.tha.pt" +default_lang: tha +langs: + - tha + +--- + +name: sonar_speech_encoder_ukr +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.ukr.pt" +default_lang: ukr +langs: + - ukr + +--- + +name: sonar_speech_encoder_urd +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.urd.pt" +default_lang: urd +langs: + - urd + +--- + +name: sonar_speech_encoder_vie +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.vie.pt" +default_lang: vie +langs: + - vie + +--- + +name: sonar_speech_encoder_yue +base: sonar_speech_encoder_base +checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v5ap.yue.pt" +default_lang: yue +langs: + - yue diff --git a/sonar/store/cards/text_sonar_basic_decoder.yaml b/sonar/cards/text_sonar_basic_decoder.yaml similarity index 99% rename from sonar/store/cards/text_sonar_basic_decoder.yaml rename to sonar/cards/text_sonar_basic_decoder.yaml index 98b0ee2..dc9b139 100644 --- a/sonar/store/cards/text_sonar_basic_decoder.yaml +++ b/sonar/cards/text_sonar_basic_decoder.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -name: sonar_basic_text_decoder +name: text_sonar_basic_decoder model_type: transformer_decoder model_arch: basic checkpoint: "https://dl.fbaipublicfiles.com/SONAR/sonar_text_decoder.pt" diff --git a/sonar/store/cards/text_sonar_basic_encoder.yaml b/sonar/cards/text_sonar_basic_encoder.yaml similarity index 95% rename from sonar/store/cards/text_sonar_basic_encoder.yaml rename to sonar/cards/text_sonar_basic_encoder.yaml index 113a563..435ab42 100644 --- a/sonar/store/cards/text_sonar_basic_encoder.yaml +++ b/sonar/cards/text_sonar_basic_encoder.yaml @@ -4,10 +4,9 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -name: sonar_basic_text_encoder +name: text_sonar_basic_encoder model_type: transformer_encoder model_arch: basic -#checkpoint: "file://private/home/padqn/phd/tmodules/models/denoise_autoencode_reg2_100k/encoder.pt" checkpoint: "https://dl.fbaipublicfiles.com/SONAR/sonar_text_encoder.pt" tokenizer: "https://dl.fbaipublicfiles.com/SONAR/sentencepiece.source.256000.model" default_lang: eng_Latn diff --git a/sonar/store/cards/text_sonar_finetuned_decoder.yaml b/sonar/cards/text_sonar_finetuned_decoder.yaml similarity index 98% rename from sonar/store/cards/text_sonar_finetuned_decoder.yaml rename to sonar/cards/text_sonar_finetuned_decoder.yaml index 2e894e7..2e0a00a 100644 --- a/sonar/store/cards/text_sonar_finetuned_decoder.yaml +++ b/sonar/cards/text_sonar_finetuned_decoder.yaml @@ -5,7 +5,7 @@ # LICENSE file in the root directory of this source tree. -name: sonar_basic_text_decoder +name: text_sonar_finetuned_decoder model_type: transformer_decoder model_arch: basic checkpoint: "https://dl.fbaipublicfiles.com/SONAR/finetuned_decoder.pt" diff --git a/sonar/inference_pipelines/speech.py b/sonar/inference_pipelines/speech.py index 9c7c636..9166fe6 100644 --- a/sonar/inference_pipelines/speech.py +++ b/sonar/inference_pipelines/speech.py @@ -6,6 +6,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass +from functools import lru_cache from pathlib import Path from typing import List, Optional, Sequence, Union, cast @@ -20,14 +21,14 @@ from fairseq2.data.audio import AudioDecoder, WaveformToFbankConverter from fairseq2.data.data_pipeline import read_sequence from fairseq2.data.text import StrSplitter, TextTokenizer, read_text -from fairseq2.generation import SequenceToTextGenerator +from fairseq2.generation import BeamSearchSeq2SeqGenerator, SequenceToTextConverter from fairseq2.memory import MemoryBlock from fairseq2.models.sequence import SequenceBatch from fairseq2.models.transformer import TransformerDecoderModel from fairseq2.typing import DataType, Device from sonar.inference_pipelines.utils import extract_sequence_batch -from sonar.models import SonarEncoderModel +from sonar.models.encoder_model import SonarEncoderModel from sonar.models.sonar_speech.loader import load_sonar_speech_model from sonar.models.sonar_speech.model import SonarSpeechEncoderModel from sonar.models.sonar_text import load_sonar_text_decoder_model, load_sonar_tokenizer @@ -62,7 +63,6 @@ class SpeechInferenceParams: device: Device = CPU_DEVICE """The device on which to run inference.""" - # TODO: This will be soon auto-tuned. Right now hand-tuned for devfair. n_parallel: int = 4 """Number of parallel calls when running the pipeline.""" @@ -133,7 +133,7 @@ def prebuild_pipeline(self, context: SpeechInferenceParams) -> DataPipelineBuild # Batch every `context.batch_size` line pipeline_builder.bucket(bucket_size=context.batch_size) - collate = Collater(pad_idx=context.pad_idx, pad_to_multiple=2) + collate = Collater(pad_value=context.pad_idx, pad_to_multiple=2) pipeline_builder.map(collate, num_parallel_calls=context.n_parallel) @@ -247,15 +247,18 @@ def load_model_from_name( def prebuild_pipeline(self, context: SpeechInferenceParams) -> DataPipelineBuilder: assert context.target_lang is not None - generator = SequenceToTextGenerator( - self.model.to(context.device), + generator = BeamSearchSeq2SeqGenerator(self.model.to(context.device)) + converter = SequenceToTextConverter( + generator, self.tokenizer, + task="translation", target_lang=context.target_lang, ) def _do_generate(data: dict) -> List[StringLike]: batch = cast(SequenceBatch, data["fbank"]) - return generator(batch.seqs, batch.seq_lens) + texts, _ = converter.batch_convert(batch.seqs, batch.padding_mask) + return texts return ( self.audio_to_fbank_dp_builder.prebuild_pipeline(context) @@ -272,7 +275,6 @@ class SpeechModelPipelineInterface(torch.nn.Module): def __init__(self, fbank_dtype: DataType) -> None: super().__init__() - self.decode_audio = AudioDecoder(dtype=fbank_dtype) self.convert_to_fbank = WaveformToFbankConverter( num_mel_bins=80, waveform_scale=2**15, @@ -281,6 +283,12 @@ def __init__(self, fbank_dtype: DataType) -> None: device=self.device, dtype=fbank_dtype, ) + self._fbank_dtype = fbank_dtype + + @property + @lru_cache(maxsize=10) + def audio_decoder(self): + return AudioDecoder(dtype=self._fbank_dtype) def _decode_audio(self, inp: Union[str, torch.Tensor]) -> dict: if isinstance(inp, torch.Tensor): @@ -292,7 +300,7 @@ def _decode_audio(self, inp: Union[str, torch.Tensor]) -> dict: else: with Path(str(inp)).open("rb") as fb: block = MemoryBlock(fb.read()) - return self.decode_audio(block) # type: ignore + return self.audio_decoder(block) # type: ignore class SpeechToTextModelPipeline(SpeechModelPipelineInterface): @@ -329,6 +337,11 @@ def __init__( self.tokenizer = tokenizer self.model = SonarEncoderDecoderModel(encoder, decoder).to(device).eval() + # Only quantize the model in CUDA to bypass the error "LayerNormKernelImpl" not implemented for 'Half' + # in some CUDAs and torch versions + if fbank_dtype == torch.float16 and device.type == "cuda": + self.model = self.model.half() + @torch.inference_mode() def predict( self, @@ -339,15 +352,18 @@ def predict( pad_idx: int = 0, n_prefetched_batches: int = 2, ) -> List[str]: - generator = SequenceToTextGenerator( - self.model.to(self.device), + generator = BeamSearchSeq2SeqGenerator(self.model.to(self.device)) + converter = SequenceToTextConverter( + generator, self.tokenizer, + task="translation", target_lang=target_lang, ) def _do_generate(data: dict) -> List[StringLike]: batch = cast(SequenceBatch, data["fbank"]) - return generator(batch.seqs, batch.seq_lens) + texts, _ = converter.batch_convert(batch.seqs, batch.padding_mask) + return texts pipeline = ( read_sequence(input) @@ -355,7 +371,7 @@ def _do_generate(data: dict) -> List[StringLike]: .map(self.convert_to_fbank, num_parallel_calls=n_parallel) .bucket(bucket_size=batch_size) .map( - Collater(pad_idx=pad_idx, pad_to_multiple=2), + Collater(pad_value=pad_idx, pad_to_multiple=2), num_parallel_calls=n_parallel, ) .prefetch(n_prefetched_batches) @@ -394,6 +410,11 @@ def __init__( encoder = load_sonar_speech_model(encoder, device=device, progress=False) self.model = encoder.to(device).eval() + # Only quantize the model in CUDA to bypass the error "LayerNormKernelImpl" not implemented for 'Half' + # in some CUDAs and torch versions + if fbank_dtype == torch.float16 and device.type == "cuda": + self.model = self.model.half() + def build_predict_pipeline( self, input_pipeline, @@ -407,7 +428,7 @@ def build_predict_pipeline( .map(self.convert_to_fbank, num_parallel_calls=n_parallel) .bucket(bucket_size=batch_size) .map( - Collater(pad_idx=pad_idx, pad_to_multiple=2), + Collater(pad_value=pad_idx, pad_to_multiple=2), num_parallel_calls=n_parallel, ) .prefetch(n_prefetched_batches) diff --git a/sonar/inference_pipelines/text.py b/sonar/inference_pipelines/text.py index 8f82559..d7cb9e4 100644 --- a/sonar/inference_pipelines/text.py +++ b/sonar/inference_pipelines/text.py @@ -8,16 +8,16 @@ from typing import List, Sequence, Union import torch -from fairseq2.data import Collater +from fairseq2.data import Collater, StringLike from fairseq2.data.cstring import CString from fairseq2.data.data_pipeline import read_sequence from fairseq2.data.text import TextTokenizer, read_text -from fairseq2.generation import TextTranslator +from fairseq2.generation import BeamSearchSeq2SeqGenerator, TextTranslator from fairseq2.models.transformer import TransformerDecoderModel from fairseq2.typing import Device from sonar.inference_pipelines.utils import extract_sequence_batch -from sonar.models import SonarEncoderModel, SonarEncoderOutput +from sonar.models.encoder_model import SonarEncoderModel, SonarEncoderOutput from sonar.models.sonar_text import ( load_sonar_text_decoder_model, load_sonar_text_encoder_model, @@ -69,12 +69,18 @@ def predict( target_lang: str, batch_size: int = 5, ) -> List[str]: + generator = BeamSearchSeq2SeqGenerator(self.model) translator = TextTranslator( - model=self.model, + generator, tokenizer=self.tokenizer, source_lang=source_lang, target_lang=target_lang, ) + + def _do_translate(src_texts: List[StringLike]) -> List[StringLike]: + texts, _ = translator.batch_translate(src_texts) + return texts + pipeline = ( ( read_text(input) @@ -82,7 +88,7 @@ def predict( else read_sequence(input) ) .bucket(batch_size) - .map(translator) + .map(_do_translate) .and_return() ) diff --git a/sonar/inference_pipelines/utils.py b/sonar/inference_pipelines/utils.py index 9f9fb58..338a8b4 100644 --- a/sonar/inference_pipelines/utils.py +++ b/sonar/inference_pipelines/utils.py @@ -4,15 +4,16 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from fairseq2.data import SequenceData from fairseq2.models.sequence import SequenceBatch +from fairseq2.nn.padding import get_seqs_and_padding_mask from fairseq2.typing import Device -def extract_sequence_batch(fbank: dict, device: Device) -> SequenceBatch: - assert "seqs" in fbank.keys() - assert "seq_lens" in fbank.keys() +def extract_sequence_batch(x: SequenceData, device: Device) -> SequenceBatch: + seqs, padding_mask = get_seqs_and_padding_mask(x) - return SequenceBatch( - seqs=fbank["seqs"].to(device=device), - seq_lens=fbank["seq_lens"].to(device=device), - ) + if padding_mask is not None: + padding_mask = padding_mask.to(device) + + return SequenceBatch(seqs.to(device), padding_mask) diff --git a/sonar/models/__init__.py b/sonar/models/__init__.py index 597ad31..2e41cd7 100644 --- a/sonar/models/__init__.py +++ b/sonar/models/__init__.py @@ -1,62 +1,5 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # -# This source code is licensed under the license found in the +# This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional - -from fairseq2.models.sequence import SequenceBatch -from torch import Tensor -from torch.nn import Module - - -@dataclass -class SonarEncoderOutput: - """Dataclass for both speech and text SONAR encoder outputs""" - - encoded_seqs: Tensor - """Holds the output of the encoder - *Shape:* :math:`(N,S,M)`, where :math:`N` is the batch size, - :math:`S` is the sequence length, and :math:`M` is the - dimensionality of the model. - """ - - sentence_embeddings: Tensor - """ Pooled representation, derived from encoded_seqs by pooling in dim=1 - *Shape:* :math:`(N,M)`, where :math:`N` is the batch size, and :math:`M` is the - dimensionality of the model. - """ - - padding_mask: Optional[Tensor] - """Optional, the floating padding mask over sequences (-inf means masked element) - *Shape:* :math:`(N,S)`, where :math:`N` is the batch size, - :math:`S` is the sequence length. - """ - - -class SonarEncoderModel(ABC, Module): - """Abstract class for both speech and text SONAR encoder models""" - - model_dim: int - - def __init__(self, model_dim: int) -> None: - """ - - :param model_dim: - The dimensionality of the model. - """ - super().__init__() - - self.model_dim = model_dim - - @abstractmethod - def forward(self, batch: SequenceBatch) -> SonarEncoderOutput: - """ - :param batch: - The batch of sequences to process. - :returns: - SonarEncoderOutput - """ diff --git a/sonar/models/blaser/__init__.py b/sonar/models/blaser/__init__.py new file mode 100644 index 0000000..503245a --- /dev/null +++ b/sonar/models/blaser/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from sonar.models.blaser.builder import BlaserBuilder as BlaserBuilder +from sonar.models.blaser.builder import BlaserConfig as BlaserConfig +from sonar.models.blaser.builder import blaser_arch as blaser_arch +from sonar.models.blaser.builder import blaser_archs as blaser_archs +from sonar.models.blaser.builder import create_blaser_model as create_blaser_model +from sonar.models.blaser.loader import load_blaser_config as load_blaser_config +from sonar.models.blaser.loader import load_blaser_model as load_blaser_model diff --git a/sonar/models/blaser/builder.py b/sonar/models/blaser/builder.py index 2c27818..d3703ec 100644 --- a/sonar/models/blaser/builder.py +++ b/sonar/models/blaser/builder.py @@ -40,8 +40,10 @@ def __post__init__(self): blaser_archs = ArchitectureRegistry[BlaserConfig]("blaser") +blaser_arch = blaser_archs.decorator -@blaser_archs.marker("basic_ref") + +@blaser_arch("basic_ref") def _arch_blaser_basic_ref() -> BlaserConfig: return BlaserConfig( embedding_dim=1024, @@ -55,7 +57,7 @@ def _arch_blaser_basic_ref() -> BlaserConfig: ) -@blaser_archs.marker("basic_qe") +@blaser_arch("basic_qe") def _arch_blaser_basic_qe() -> BlaserConfig: return BlaserConfig( embedding_dim=1024, @@ -77,6 +79,7 @@ class BlaserBuilder: def __init__( self, config: BlaserConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> None: @@ -100,6 +103,7 @@ def build_model(self) -> BlaserModel: def create_blaser_model( config: BlaserConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> BlaserModel: @@ -111,4 +115,4 @@ def create_blaser_model( :param dtype: The data type of module parameters and buffers. """ - return BlaserBuilder(config, device, dtype).build_model() + return BlaserBuilder(config, device=device, dtype=dtype).build_model() diff --git a/sonar/models/blaser/loader.py b/sonar/models/blaser/loader.py index 53a6848..fe9f2cb 100644 --- a/sonar/models/blaser/loader.py +++ b/sonar/models/blaser/loader.py @@ -4,33 +4,32 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Mapping, final +from typing import Any, Mapping -from fairseq2.assets import download_manager -from fairseq2.models.utils.model_loader import ModelLoader +from fairseq2.assets import asset_store, download_manager +from fairseq2.models.utils import ConfigLoader, ModelLoader from sonar.models.blaser.builder import BlaserConfig, blaser_archs, create_blaser_model from sonar.models.blaser.model import BlaserModel -from sonar.store import asset_store -@final -class BlaserLoader(ModelLoader[BlaserModel, BlaserConfig]): - """Loads Blaser models""" +def convert_blaser_checkpoint( + checkpoint: Mapping[str, Any], config: BlaserConfig +) -> Mapping[str, Any]: + # Return directly if found fairseq2 attribute in state dict + if "model" in checkpoint.keys(): + return checkpoint + # Othewise (the old checkpoint format), move the whole state dict to the "model" section + return {"model": checkpoint} - def _upgrade_checkpoint( - self, checkpoint: Mapping[str, Any], config: BlaserConfig - ) -> Mapping[str, Any]: - # Return directly if found fairseq2 attribute in state dict - if "model" in checkpoint.keys(): - return checkpoint - # Othewise (the old checkpoint format), move the whole state dict to the "model" section - return {"model": checkpoint} +load_blaser_config = ConfigLoader[BlaserConfig](asset_store, blaser_archs) -load_blaser_model = BlaserLoader( +load_blaser_model = ModelLoader[BlaserModel, BlaserConfig]( asset_store, download_manager, + load_blaser_config, create_blaser_model, - blaser_archs, + convert_blaser_checkpoint, + restrict_checkpoints=False, ) diff --git a/sonar/models/encoder_model.py b/sonar/models/encoder_model.py new file mode 100644 index 0000000..eeeb85f --- /dev/null +++ b/sonar/models/encoder_model.py @@ -0,0 +1,63 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional + +from fairseq2.models.sequence import SequenceBatch +from fairseq2.nn.padding import PaddingMask +from torch import Tensor +from torch.nn import Module + + +@dataclass +class SonarEncoderOutput: + """Dataclass for both speech and text SONAR encoder outputs""" + + encoded_seqs: Tensor + """Holds the output of the encoder + *Shape:* :math:`(N,S,M)`, where :math:`N` is the batch size, + :math:`S` is the sequence length, and :math:`M` is the + dimensionality of the model. + """ + + sentence_embeddings: Tensor + """ Pooled representation, derived from encoded_seqs by pooling in dim=1 + *Shape:* :math:`(N,M)`, where :math:`N` is the batch size, and :math:`M` is the + dimensionality of the model. + """ + + padding_mask: Optional[PaddingMask] + """Optional, the floating padding mask over sequences (-inf means masked element) + *Shape:* :math:`(N,S)`, where :math:`N` is the batch size, + :math:`S` is the sequence length. + """ + + +class SonarEncoderModel(ABC, Module): + """Abstract class for both speech and text SONAR encoder models""" + + model_dim: int + + def __init__(self, model_dim: int) -> None: + """ + + :param model_dim: + The dimensionality of the model. + """ + super().__init__() + + self.model_dim = model_dim + + @abstractmethod + def forward(self, batch: SequenceBatch) -> SonarEncoderOutput: + """ + :param batch: + The batch of sequences to process. + :returns: + SonarEncoderOutput + """ diff --git a/sonar/models/laser2_text/__init__.py b/sonar/models/laser2_text/__init__.py index a76d860..143a17a 100644 --- a/sonar/models/laser2_text/__init__.py +++ b/sonar/models/laser2_text/__init__.py @@ -9,6 +9,7 @@ from sonar.models.laser2_text.builder import create_laser2_model as create_laser2_model from sonar.models.laser2_text.builder import laser2_arch as laser2_arch from sonar.models.laser2_text.builder import laser2_archs as laser2_archs +from sonar.models.laser2_text.loader import load_laser2_config as load_laser2_config from sonar.models.laser2_text.loader import load_laser2_model as load_laser2_model from sonar.models.laser2_text.loader import ( load_laser2_tokenizer as load_laser2_tokenizer, diff --git a/sonar/models/laser2_text/builder.py b/sonar/models/laser2_text/builder.py index ced9cbb..6a9e3f5 100644 --- a/sonar/models/laser2_text/builder.py +++ b/sonar/models/laser2_text/builder.py @@ -28,7 +28,7 @@ class Laser2Config: laser2_archs = ArchitectureRegistry[Laser2Config]("lstm") -laser2_arch = laser2_archs.marker +laser2_arch = laser2_archs.decorator @laser2_arch("laser2") @@ -52,6 +52,7 @@ class Laser2Builder: def __init__( self, config: Laser2Config, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> None: @@ -83,6 +84,7 @@ def build_model(self) -> LaserLstmEncoder: def create_laser2_model( config: Laser2Config, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> LaserLstmEncoder: @@ -94,4 +96,4 @@ def create_laser2_model( :param dtype: The data type of module parameters and buffers. """ - return Laser2Builder(config, device, dtype).build_model() + return Laser2Builder(config, device=device, dtype=dtype).build_model() diff --git a/sonar/models/laser2_text/loader.py b/sonar/models/laser2_text/loader.py index b386e27..82b4d22 100644 --- a/sonar/models/laser2_text/loader.py +++ b/sonar/models/laser2_text/loader.py @@ -4,9 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - -from fairseq2.assets import download_manager -from fairseq2.models.utils.model_loader import ModelLoader +from fairseq2.assets import asset_store, download_manager +from fairseq2.models.utils import ConfigLoader, ModelLoader, TokenizerLoader from sonar.models.laser2_text.builder import ( Laser2Config, @@ -15,50 +14,17 @@ ) from sonar.models.laser2_text.tokenizer import Laser2Tokenizer from sonar.nn.laser_lstm_encoder import LaserLstmEncoder -from sonar.store import asset_store + +load_laser2_config = ConfigLoader[Laser2Config](asset_store, laser2_archs) load_laser2_model = ModelLoader[LaserLstmEncoder, Laser2Config]( - asset_store, download_manager, create_laser2_model, laser2_archs + asset_store, + download_manager, + load_laser2_config, + create_laser2_model, + restrict_checkpoints=False, ) - -from fairseq2.assets import AssetDownloadManager, AssetStore, download_manager - - -class Laser2TokenizerLoader: - """Loads tokenizers of Laser2 models.""" - - def __init__( - self, - asset_store: AssetStore = asset_store, - download_manager: AssetDownloadManager = download_manager, - ) -> None: - """ - :param asset_store: - The asset store to retrieve the model information. - :param download_manager: - The download manager to use. - """ - self.asset_store = asset_store - self.download_manager = download_manager - - def __call__( - self, model_name: str, force: bool = False, progress: bool = False - ) -> Laser2Tokenizer: - """ - :param name: - The name of the model. - :param force: - If ``True``, downloads the tokenizer even if it is already in cache. - :param progress: - If ``True``, displays a progress bar to stderr. - """ - card = self.asset_store.retrieve_card(model_name) - uri = card.field("tokenizer").as_uri() - pathname = self.download_manager.download_tokenizer( - uri, card.name, force=force, progress=progress - ) - return Laser2Tokenizer(pathname) - - -load_laser2_tokenizer = Laser2TokenizerLoader() +load_laser2_tokenizer = TokenizerLoader[Laser2Tokenizer]( + asset_store, download_manager, Laser2Tokenizer +) diff --git a/sonar/models/laser2_text/tokenizer.py b/sonar/models/laser2_text/tokenizer.py index 81845af..0d10031 100644 --- a/sonar/models/laser2_text/tokenizer.py +++ b/sonar/models/laser2_text/tokenizer.py @@ -13,25 +13,43 @@ SentencePieceModel, TextTokenDecoder, TextTokenEncoder, + TextTokenizer, ) -from fairseq2.data.text.sentencepiece import vocabulary_from_sentencepiece +from fairseq2.data.text.sentencepiece import vocab_info_from_sentencepiece from fairseq2.data.typing import PathLike, StringLike -from fairseq2.typing import Device +from fairseq2.typing import Device, finaloverride +from torch import Tensor +from typing_extensions import NoReturn @final -class Laser2Encoder: +class Laser2Encoder(TextTokenEncoder): def __init__(self, spm_encoder: SentencePieceEncoder) -> None: - super().__init__() self.spm_encoder: SentencePieceEncoder = spm_encoder + @finaloverride def __call__(self, sentence: StringLike) -> torch.Tensor: out = self.spm_encoder(sentence) + return torch.where(out >= 3, out + 4, out) + @finaloverride + def encode_as_tokens(self, text: StringLike) -> NoReturn: + raise RuntimeError("not implemented!") + + @property + @finaloverride + def prefix_indices(self) -> Optional[Tensor]: + return self.spm_encoder.prefix_indices + + @property + @finaloverride + def suffix_indices(self) -> Optional[Tensor]: + return self.spm_encoder.suffix_indices + @final -class Laser2Tokenizer: +class Laser2Tokenizer(TextTokenizer): """Represents the tokenizer used by S2T Transformer models.""" model: SentencePieceModel @@ -45,10 +63,20 @@ def __init__( The pathname of the SentencePiece model file. """ self.model = SentencePieceModel(pathname, [""]) - self.vocab_info = vocabulary_from_sentencepiece(self.model) + vocab_info = vocab_info_from_sentencepiece(self.model) + + super().__init__(vocab_info) + + @finaloverride def create_encoder( - self, device: Optional[Device] = None, pin_memory: bool = False + self, + *, + task: Optional[str] = None, + lang: Optional[str] = None, + mode: Optional[str] = None, + device: Optional[Device] = None, + pin_memory: bool = False, ) -> Laser2Encoder: return Laser2Encoder( spm_encoder=SentencePieceEncoder( @@ -59,5 +87,12 @@ def create_encoder( ) ) + @finaloverride + def create_raw_encoder( + self, *, device: Optional[Device] = None, pin_memory: bool = False + ) -> TextTokenEncoder: + return SentencePieceEncoder(self.model, device=device, pin_memory=pin_memory) + + @finaloverride def create_decoder(self) -> TextTokenDecoder: return SentencePieceDecoder(self.model) diff --git a/sonar/models/sonar_speech/__init__.py b/sonar/models/sonar_speech/__init__.py index 74ab218..c2e266e 100644 --- a/sonar/models/sonar_speech/__init__.py +++ b/sonar/models/sonar_speech/__init__.py @@ -4,7 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from sonar.models.sonar_speech.builder import ( SonarSpeechEncoderBuilder as SonarSpeechEncoderBuilder, ) @@ -16,6 +15,9 @@ ) from sonar.models.sonar_speech.builder import sonar_speech_arch as sonar_speech_arch from sonar.models.sonar_speech.builder import sonar_speech_archs as sonar_speech_archs +from sonar.models.sonar_speech.loader import ( + load_sonar_speech_config as load_sonar_speech_config, +) from sonar.models.sonar_speech.loader import ( load_sonar_speech_model as load_sonar_speech_model, ) diff --git a/sonar/models/sonar_speech/builder.py b/sonar/models/sonar_speech/builder.py index 6e1e581..5c0c0a3 100644 --- a/sonar/models/sonar_speech/builder.py +++ b/sonar/models/sonar_speech/builder.py @@ -15,7 +15,7 @@ from fairseq2.models.w2vbert import w2vbert_archs from fairseq2.models.wav2vec2 import Wav2Vec2EncoderBuilder, Wav2Vec2EncoderConfig from fairseq2.nn import Linear -from fairseq2.nn.embedding import Embedding +from fairseq2.nn.embedding import Embedding, StandardEmbedding, init_scaled_embedding from fairseq2.nn.normalization import LayerNorm from fairseq2.nn.position_encoder import PositionEncoder, SinusoidalPositionEncoder from fairseq2.nn.transformer import TransformerNormOrder @@ -29,7 +29,7 @@ TransformerDecoderLayer, ) from fairseq2.nn.transformer.ffn import FeedForwardNetwork, StandardFeedForwardNetwork -from fairseq2.nn.transformer.layer_norm import create_default_layer_norm +from fairseq2.nn.transformer.layer_norm import create_standard_layer_norm from fairseq2.nn.transformer.multihead_attention import ( MultiheadAttention, StandardMultiheadAttention, @@ -79,7 +79,8 @@ class SonarSpeechEncoderConfig: sonar_speech_archs = ArchitectureRegistry[SonarSpeechEncoderConfig]("sonar_speech") -sonar_speech_arch = sonar_speech_archs.marker + +sonar_speech_arch = sonar_speech_archs.decorator @sonar_speech_arch("english") @@ -137,6 +138,7 @@ def __init__( self, config: SonarSpeechEncoderConfig, w2v2_encoder_builder: Wav2Vec2EncoderBuilder, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> None: @@ -196,11 +198,11 @@ def build_pos_encoder(self) -> PositionEncoder: def build_embedding(self) -> Embedding: """Build an embedding table.""" - return Embedding( + return StandardEmbedding( num_embeddings=self.config.w2v2_encoder_config.model_dim, embedding_dim=self.config.model_dim, pad_idx=self.config.pad_idx, - scaled=True, + init_fn=init_scaled_embedding, ) def build_decoder(self) -> TransformerDecoder: @@ -240,6 +242,7 @@ def build_ffn(self) -> FeedForwardNetwork: return StandardFeedForwardNetwork( self.config.model_dim, self.config.ffn_inner_dim, + bias=True, norm_order=self.config.decoder_norm_order, ) @@ -248,7 +251,7 @@ def build_w2v2_final_layer_norm(self) -> Optional[LayerNorm]: if not self.config.w2v2_encoder_config.use_conformer: return None - return create_default_layer_norm( + return create_standard_layer_norm( self.config.w2v2_encoder_config.model_dim, ) @@ -263,6 +266,7 @@ def build_projection_out(self) -> Linear: def create_sonar_speech_encoder_model( config: SonarSpeechEncoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> SonarSpeechEncoderModel: @@ -276,11 +280,11 @@ def create_sonar_speech_encoder_model( The data type of module parameters and buffers. """ w2v2_encoder_builder = Wav2Vec2EncoderBuilder( - config.w2v2_encoder_config, device, dtype + config.w2v2_encoder_config, device=device, dtype=dtype ) sonar_builder = SonarSpeechEncoderBuilder( - config, w2v2_encoder_builder, device, dtype + config, w2v2_encoder_builder, device=device, dtype=dtype ) return sonar_builder.build_model() diff --git a/sonar/models/sonar_speech/loader.py b/sonar/models/sonar_speech/loader.py index 580cef2..9eb8a28 100644 --- a/sonar/models/sonar_speech/loader.py +++ b/sonar/models/sonar_speech/loader.py @@ -4,12 +4,11 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Dict, Mapping, final +from typing import Any, Mapping -from fairseq2.assets import download_manager -from fairseq2.models.utils.checkpoint_loader import upgrade_fairseq_checkpoint -from fairseq2.models.utils.model_loader import ModelConfigLoader, ModelLoader -from overrides import override as finaloverride +from fairseq2.assets import asset_store, download_manager +from fairseq2.models.utils import ConfigLoader, ModelLoader +from fairseq2.models.utils.checkpoint import convert_fairseq_checkpoint from sonar.models.sonar_speech.builder import ( SonarSpeechEncoderConfig, @@ -17,91 +16,81 @@ sonar_speech_archs, ) from sonar.models.sonar_speech.model import SonarSpeechEncoderModel -from sonar.store import asset_store -@final -class SonarSpeechEncoderLoader( - ModelLoader[SonarSpeechEncoderModel, SonarSpeechEncoderConfig] -): - """Loads sonar models.""" +def convert_sonar_speech_checkpoint( + checkpoint: Mapping[str, Any], config: SonarSpeechEncoderConfig +) -> Mapping[str, Any]: + state_dict = checkpoint["model"] - @finaloverride - def _upgrade_checkpoint( - self, checkpoint: Mapping[str, Any], config: SonarSpeechEncoderConfig - ) -> Mapping[str, Any]: - state_dict = checkpoint["model"] - - # Check if we have a fairseq2 checkpoint. - if "encoder_frontend.model_dim_proj" in state_dict: - return checkpoint + # Check if we have a fairseq2 checkpoint. + if "encoder_frontend.model_dim_proj" in state_dict: + return checkpoint + if "encoder.w2v_model.mask_emb" in state_dict: del state_dict["encoder.w2v_model.mask_emb"] - key_map = self._fairseq_key_map(config) - - return upgrade_fairseq_checkpoint(checkpoint, key_map) - - @staticmethod - def _fairseq_key_map(config: SonarSpeechEncoderConfig) -> Dict[str, str]: - key_map = { - # fmt: off - # encoder - r"^encoder.w2v_model.layer_norm\.": r"encoder_frontend.post_extract_layer_norm.", - r"^encoder.w2v_model.post_extract_proj\.": r"encoder_frontend.model_dim_proj.", - r"^encoder.w2v_model.encoder\.pos_conv\.0\.": r"encoder_frontend.pos_encoder.conv.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.batch_norm\.": r"encoder.layers.\1.conv.batch_norm.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.depthwise_conv\.": r"encoder.layers.\1.conv.depthwise_conv.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.layer_norm\.": r"encoder.layers.\1.conv_layer_norm.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.pointwise_conv1\.": r"encoder.layers.\1.conv.pointwise_conv1.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.pointwise_conv2\.": r"encoder.layers.\1.conv.pointwise_conv2.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.layer_norm\.": r"encoder.layers.\1.ffn\2_layer_norm.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.w_1\.": r"encoder.layers.\1.ffn\2.inner_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.w_2\.": r"encoder.layers.\1.ffn\2.output_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder.layers.\1.self_attn_layer_norm.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_q\.": r"encoder.layers.\1.self_attn.q_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_k\.": r"encoder.layers.\1.self_attn.k_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_v\.": r"encoder.layers.\1.self_attn.v_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_out\.": r"encoder.layers.\1.self_attn.output_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_pos\.": r"encoder.layers.\1.self_attn.sdpa.r_proj.", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.pos_bias_u": r"encoder.layers.\1.self_attn.sdpa.u_bias", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.pos_bias_v": r"encoder.layers.\1.self_attn.sdpa.v_bias", - r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.final_layer_norm\.": r"encoder.layers.\1.layer_norm.", - r"^encoder.w2v_model.encoder\.layer_norm\.": r"encoder.layer_norm.", - - r"^decoder\.embed_tokens\.": r"encoder_pooler.decoder_frontend.embed.", - r"^decoder\.layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder_pooler.decoder.layers.\1.self_attn_layer_norm.", - r"^decoder\.layers\.([0-9]+)\.self_attn\.out_proj\.": r"encoder_pooler.decoder.layers.\1.self_attn.output_proj.", - r"^decoder\.layers\.([0-9]+)\.self_attn\.": r"encoder_pooler.decoder.layers.\1.self_attn.", - r"^decoder\.layers\.([0-9]+)\.encoder_attn_layer_norm\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn_layer_norm.", - r"^decoder\.layers\.([0-9]+)\.encoder_attn\.out_proj\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn.output_proj.", - r"^decoder\.layers\.([0-9]+)\.encoder_attn\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn.", - r"^decoder\.layers\.([0-9]+)\.fc1\.": r"encoder_pooler.decoder.layers.\1.ffn.inner_proj.", - r"^decoder\.layers\.([0-9]+)\.fc2\.": r"encoder_pooler.decoder.layers.\1.ffn.output_proj.", - r"^decoder\.layers\.([0-9]+)\.final_layer_norm\.": r"encoder_pooler.decoder.layers.\1.ffn_layer_norm.", - - r"^decoder\.embed_out": r"encoder_pooler.projection_out.weight", - # fmt: on - } - - # In normal circumstances, we should never encounter a `LayerNorm` when - # `use_conformer` is `True`. Unfortunately, the w2v-BERT pretraining in - # fairseq was accidentally run with a pre-LN encoder, and ended up with - # a redundant `LayerNorm` right after the Conformer blocks. We mitigate - # that issue here by moving that `LayerNorm` to the sonar block. - if config.w2v2_encoder_config.use_conformer: - key_map.update( - {r"^encoder.w2v_model.encoder\.layer_norm\.": r"layer_norm."} - ) - - return key_map - - -load_sonar_speech_model = SonarSpeechEncoderLoader( - asset_store, download_manager, create_sonar_speech_encoder_model, sonar_speech_archs + key_map = { + # fmt: off + # encoder + r"^encoder.w2v_model.layer_norm\.": r"encoder_frontend.post_extract_layer_norm.", + r"^encoder.w2v_model.post_extract_proj\.": r"encoder_frontend.model_dim_proj.", + r"^encoder.w2v_model.encoder\.pos_conv\.0\.": r"encoder_frontend.pos_encoder.conv.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.batch_norm\.": r"encoder.layers.\1.conv.batch_norm.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.depthwise_conv\.": r"encoder.layers.\1.conv.depthwise_conv.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.layer_norm\.": r"encoder.layers.\1.conv_layer_norm.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.pointwise_conv1\.": r"encoder.layers.\1.conv.pointwise_conv1.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.conv_module\.pointwise_conv2\.": r"encoder.layers.\1.conv.pointwise_conv2.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.layer_norm\.": r"encoder.layers.\1.ffn\2_layer_norm.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.w_1\.": r"encoder.layers.\1.ffn\2.inner_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.ffn(1|2)\.w_2\.": r"encoder.layers.\1.ffn\2.output_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder.layers.\1.self_attn_layer_norm.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_q\.": r"encoder.layers.\1.self_attn.q_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_k\.": r"encoder.layers.\1.self_attn.k_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_v\.": r"encoder.layers.\1.self_attn.v_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_out\.": r"encoder.layers.\1.self_attn.output_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.linear_pos\.": r"encoder.layers.\1.self_attn.sdpa.r_proj.", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.pos_bias_u": r"encoder.layers.\1.self_attn.sdpa.u_bias", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.self_attn\.pos_bias_v": r"encoder.layers.\1.self_attn.sdpa.v_bias", + r"^encoder.w2v_model.encoder\.layers\.([0-9]+)\.final_layer_norm\.": r"encoder.layers.\1.layer_norm.", + r"^encoder.w2v_model.encoder\.layer_norm\.": r"encoder.layer_norm.", + + r"^decoder\.embed_tokens\.": r"encoder_pooler.decoder_frontend.embed.", + r"^decoder\.layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder_pooler.decoder.layers.\1.self_attn_layer_norm.", + r"^decoder\.layers\.([0-9]+)\.self_attn\.out_proj\.": r"encoder_pooler.decoder.layers.\1.self_attn.output_proj.", + r"^decoder\.layers\.([0-9]+)\.self_attn\.": r"encoder_pooler.decoder.layers.\1.self_attn.", + r"^decoder\.layers\.([0-9]+)\.encoder_attn_layer_norm\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn_layer_norm.", + r"^decoder\.layers\.([0-9]+)\.encoder_attn\.out_proj\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn.output_proj.", + r"^decoder\.layers\.([0-9]+)\.encoder_attn\.": r"encoder_pooler.decoder.layers.\1.encoder_decoder_attn.", + r"^decoder\.layers\.([0-9]+)\.fc1\.": r"encoder_pooler.decoder.layers.\1.ffn.inner_proj.", + r"^decoder\.layers\.([0-9]+)\.fc2\.": r"encoder_pooler.decoder.layers.\1.ffn.output_proj.", + r"^decoder\.layers\.([0-9]+)\.final_layer_norm\.": r"encoder_pooler.decoder.layers.\1.ffn_layer_norm.", + + r"^decoder\.embed_out": r"encoder_pooler.projection_out.weight", + # fmt: on + } + + # In normal circumstances, we should never encounter a `LayerNorm` when + # `use_conformer` is `True`. Unfortunately, the w2v-BERT pretraining in + # fairseq was accidentally run with a pre-LN encoder, and ended up with + # a redundant `LayerNorm` right after the Conformer blocks. We mitigate + # that issue here by moving that `LayerNorm` to the sonar block. + if config.w2v2_encoder_config.use_conformer: + key_map.update({r"^encoder.w2v_model.encoder\.layer_norm\.": r"layer_norm."}) + + return convert_fairseq_checkpoint(checkpoint, key_map) + + +load_sonar_speech_config = ConfigLoader[SonarSpeechEncoderConfig]( + asset_store, sonar_speech_archs ) - -load_sonar_speech_config = ModelConfigLoader[SonarSpeechEncoderConfig]( - asset_store, sonar_speech_archs +load_sonar_speech_model = ModelLoader[ + SonarSpeechEncoderModel, SonarSpeechEncoderConfig +]( + asset_store, + download_manager, + load_sonar_speech_config, + create_sonar_speech_encoder_model, + convert_sonar_speech_checkpoint, ) diff --git a/sonar/models/sonar_speech/model.py b/sonar/models/sonar_speech/model.py index c97547b..c62659b 100644 --- a/sonar/models/sonar_speech/model.py +++ b/sonar/models/sonar_speech/model.py @@ -6,15 +6,15 @@ from typing import Optional, Tuple -import torch from fairseq2.models.sequence import SequenceBatch -from fairseq2.models.transformer.frontend import TransformerFrontend +from fairseq2.models.transformer import TransformerFrontend from fairseq2.nn.normalization import LayerNorm +from fairseq2.nn.padding import PaddingMask from fairseq2.nn.transformer import TransformerEncoder from torch import Tensor from torch.nn import Dropout -from sonar.models import SonarEncoderModel, SonarEncoderOutput +from sonar.models.encoder_model import SonarEncoderModel, SonarEncoderOutput from sonar.nn.encoder_pooler import EncoderOutputPooler @@ -57,7 +57,7 @@ def __init__( self.encoder_pooler = encoder_pooler def forward(self, batch: SequenceBatch) -> SonarEncoderOutput: - seqs, padding_mask = self.encoder_frontend(batch.seqs, batch.seq_lens) + seqs, padding_mask = self.encoder_frontend(batch.seqs, batch.padding_mask) encoder_output, encoder_padding_mask = self.encoder(seqs, padding_mask) # This is the workaround for the pre-LN issue of redundant LayerNorm. @@ -77,9 +77,9 @@ def forward(self, batch: SequenceBatch) -> SonarEncoderOutput: ) def encode( - self, seqs: torch.Tensor, seq_lens: Optional[torch.Tensor] + self, seqs: Tensor, padding_mask: Optional[PaddingMask] ) -> Tuple[Tensor, Optional[Tensor]]: - sonar_output_encoder = self.encoder(seqs, seq_lens) + sonar_output_encoder = self.encoder(seqs, padding_mask) return ( sonar_output_encoder.sentence_embeddings.unsqueeze(1), None, diff --git a/sonar/models/sonar_text/__init__.py b/sonar/models/sonar_text/__init__.py index a61dcd3..88a86b8 100644 --- a/sonar/models/sonar_text/__init__.py +++ b/sonar/models/sonar_text/__init__.py @@ -35,13 +35,13 @@ sonar_text_encoder_archs as sonar_text_encoder_archs, ) from sonar.models.sonar_text.loader import ( - SonarTextDecoderLoader as SonarTextDecoderLoader, + load_sonar_text_decoder_config as load_sonar_text_decoder_config, ) from sonar.models.sonar_text.loader import ( - SonarTextEncoderLoader as SonarTextEncoderLoader, + load_sonar_text_decoder_model as load_sonar_text_decoder_model, ) from sonar.models.sonar_text.loader import ( - load_sonar_text_decoder_model as load_sonar_text_decoder_model, + load_sonar_text_encoder_config as load_sonar_text_encoder_config, ) from sonar.models.sonar_text.loader import ( load_sonar_text_encoder_model as load_sonar_text_encoder_model, diff --git a/sonar/models/sonar_text/builder.py b/sonar/models/sonar_text/builder.py index 6ef2a26..daaef63 100644 --- a/sonar/models/sonar_text/builder.py +++ b/sonar/models/sonar_text/builder.py @@ -8,13 +8,14 @@ from typing import Optional import torch.nn +from fairseq2.data import VocabularyInfo from fairseq2.models.transformer import ( TransformerDecoderModel, TransformerEmbeddingFrontend, TransformerFrontend, ) from fairseq2.models.utils.arch_registry import ArchitectureRegistry -from fairseq2.nn.embedding import Embedding +from fairseq2.nn.embedding import StandardEmbedding, init_scaled_embedding from fairseq2.nn.normalization import StandardLayerNorm from fairseq2.nn.position_encoder import ( LearnedPositionEncoder, @@ -49,15 +50,13 @@ class SonarTextEncoderConfig: model_dim: int """The dimensionality of the model.""" - vocabulary_size: int - """The size of the vocabulary.""" - max_seq_len: int """The expected maximum sequence length. Corresponds to `max_source_positions` in fairseq """ - pad_idx: int - """The index of the pad symbol in the vocabulary.""" + + vocab_info: VocabularyInfo + """the vocabulary information.""" num_encoder_layers: int """The number of Transformer encoder layers.""" @@ -115,15 +114,17 @@ class SonarTextEncoderConfig: sonar_text_encoder_archs = ArchitectureRegistry[SonarTextEncoderConfig]( "transformer_encoder" ) -sonar_text_encoder_arch = sonar_text_encoder_archs.marker + +sonar_text_encoder_arch = sonar_text_encoder_archs.decorator @sonar_text_encoder_arch("basic") def encoder_basic() -> SonarTextEncoderConfig: return SonarTextEncoderConfig( model_dim=1024, - pad_idx=1, - vocabulary_size=256206, + vocab_info=VocabularyInfo( + size=256206, unk_idx=1, bos_idx=2, eos_idx=3, pad_idx=1 + ), learned_pos=False, no_scale_embedding=False, emb_dropout_p=0.1, @@ -152,6 +153,7 @@ class SonarTextEncoderBuilder: def __init__( self, config: SonarTextEncoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> None: @@ -167,7 +169,9 @@ def __init__( self.device = device self.dtype = dtype if self.config._from_fairseq: - self.config.max_seq_len += self.config.pad_idx + 1 + assert self.config.vocab_info.pad_idx is not None + + self.config.max_seq_len += self.config.vocab_info.pad_idx + 1 self.transformer_normalize_order = ( TransformerNormOrder.PRE @@ -177,11 +181,11 @@ def __init__( def build_model(self) -> SonarTextTransformerEncoderModel: """Build a SonarTextTransformerEncoderModel model.""" - embed = Embedding( - num_embeddings=self.config.vocabulary_size, + embed = StandardEmbedding( + num_embeddings=self.config.vocab_info.size, embedding_dim=self.config.model_dim, - pad_idx=self.config.pad_idx, - scaled=True, + pad_idx=self.config.vocab_info.pad_idx, + init_fn=init_scaled_embedding, ) pos_encoder: Optional[PositionEncoder] = None @@ -195,7 +199,7 @@ def build_model(self) -> SonarTextTransformerEncoderModel: pos_encoder = SinusoidalPositionEncoder( encoding_dim=self.config.model_dim, max_seq_len=self.config.max_seq_len, - _legacy_pad_idx=self.config.pad_idx, + _legacy_pad_idx=self.config.vocab_info.pad_idx, ) embedding_frontend = TransformerEmbeddingFrontend( @@ -215,7 +219,7 @@ def build_model(self) -> SonarTextTransformerEncoderModel: model = SonarTextTransformerEncoderModel( encoder_frontend=embedding_frontend, encoder=encoder, - layer_norm=StandardLayerNorm(self.config.model_dim), + layer_norm=StandardLayerNorm(self.config.model_dim, bias=True), pooling=getattr(Pooling, self.config.pooling.upper()), ) return model.to(device=self.device, dtype=self.dtype) @@ -242,6 +246,7 @@ def build_ffn(self) -> FeedForwardNetwork: return StandardFeedForwardNetwork( self.config.model_dim, self.config.ffn_inner_dim, + bias=True, inner_activation=getattr(torch.nn, self.config.activation_fn)(), inner_dropout_p=self.config.activation_dropout_p, norm_order=self.transformer_normalize_order, @@ -250,6 +255,7 @@ def build_ffn(self) -> FeedForwardNetwork: def create_sonar_text_encoder_model( config: SonarTextEncoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> SonarTextTransformerEncoderModel: @@ -262,7 +268,7 @@ def create_sonar_text_encoder_model( :param dtype: The data type of module parameters and buffers. """ - return SonarTextEncoderBuilder(config, device, dtype).build_model() + return SonarTextEncoderBuilder(config, device=device, dtype=dtype).build_model() @dataclass @@ -272,17 +278,14 @@ class SonarTextDecoderConfig: model_dim: int """The dimensionality of the model.""" - vocabulary_size: int - """The size of the vocabulary.""" - - pad_idx: int - """The index of the pad symbol in the vocabulary.""" - max_seq_len: int """The expected maximum sequence length. Corresponds to `max_source_positions` in fairseq """ + vocab_info: VocabularyInfo + """The vocabulary information.""" + activation_fn: str """ activation function to use in FeedForward network of Transformers; None corresponds to ReLu""" @@ -330,16 +333,18 @@ class SonarTextDecoderConfig: sonar_text_decoder_archs = ArchitectureRegistry[SonarTextDecoderConfig]( "transformer_decoder" ) -sonar_text_decoder_arch = sonar_text_decoder_archs.marker + +sonar_text_decoder_arch = sonar_text_decoder_archs.decorator @sonar_text_decoder_arch("basic") def decoder_basic() -> SonarTextDecoderConfig: return SonarTextDecoderConfig( model_dim=1024, - pad_idx=1, - vocabulary_size=256206, max_seq_len=512, + vocab_info=VocabularyInfo( + size=256206, unk_idx=1, bos_idx=2, eos_idx=3, pad_idx=1 + ), learned_pos=False, no_scale_embedding=False, emb_dropout_p=0.1, @@ -365,6 +370,7 @@ class SonarTextDecoderBuilder: def __init__( self, config: SonarTextDecoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> None: @@ -390,16 +396,16 @@ def build_decoder_frontend(self) -> TransformerFrontend: """ decoder frontend is very similar to encoder one """ - embed = Embedding( - num_embeddings=self.config.vocabulary_size, + embed = StandardEmbedding( + num_embeddings=self.config.vocab_info.size, embedding_dim=self.config.model_dim, - pad_idx=self.config.pad_idx, - scaled=True, + pad_idx=self.config.vocab_info.pad_idx, + init_fn=init_scaled_embedding, ) pos_encoder = SinusoidalPositionEncoder( encoding_dim=self.config.model_dim, max_seq_len=self.config.max_seq_len, - _legacy_pad_idx=self.config.pad_idx, + _legacy_pad_idx=self.config.vocab_info.pad_idx, ) return TransformerEmbeddingFrontend( embed, @@ -412,6 +418,7 @@ def build_decoder_frontend(self) -> TransformerFrontend: def build_decoder_layer(self) -> TransformerDecoderLayer: """Build a Transformer decoder layer.""" self_attn = self.build_attention() + encoder_decoder_attn = self.build_attention() ffn = self.build_ffn() @@ -437,6 +444,7 @@ def build_ffn(self) -> FeedForwardNetwork: return StandardFeedForwardNetwork( self.config.model_dim, self.config.ffn_inner_dim, + bias=True, inner_activation=getattr(torch.nn, self.config.activation_fn)(), inner_dropout_p=self.config.activation_dropout_p, norm_order=self.transformer_normalize_order, @@ -458,18 +466,19 @@ def build_model(self) -> TransformerDecoderModel: decoder_frontend = self.build_decoder_frontend() final_proj = Linear( input_dim=self.config.model_dim, - output_dim=self.config.vocabulary_size, + output_dim=self.config.vocab_info.size, bias=False, ) model = TransformerDecoderModel( - decoder_frontend, decoder, final_proj, target_pad_idx=self.config.pad_idx + decoder_frontend, decoder, final_proj, self.config.vocab_info ) return model.to(device=self.device, dtype=self.dtype) def create_sonar_text_decoder_model( config: SonarTextDecoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> TransformerDecoderModel: @@ -482,4 +491,4 @@ def create_sonar_text_decoder_model( :param dtype: The data type of module parameters and buffers. """ - return SonarTextDecoderBuilder(config, device, dtype).build_model() + return SonarTextDecoderBuilder(config, device=device, dtype=dtype).build_model() diff --git a/sonar/models/sonar_text/loader.py b/sonar/models/sonar_text/loader.py index 39ee02e..02d98fc 100644 --- a/sonar/models/sonar_text/loader.py +++ b/sonar/models/sonar_text/loader.py @@ -4,15 +4,14 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Dict, Mapping, final +from typing import Any, Mapping import torch -from fairseq2.assets import download_manager +from fairseq2.assets import asset_store, download_manager from fairseq2.models.nllb.loader import NllbTokenizerLoader from fairseq2.models.transformer import TransformerDecoderModel -from fairseq2.models.utils.checkpoint_loader import upgrade_fairseq_checkpoint -from fairseq2.models.utils.model_loader import ModelLoader -from overrides import override as finaloverride +from fairseq2.models.utils import ConfigLoader, ModelLoader +from fairseq2.models.utils.checkpoint import convert_fairseq_checkpoint from sonar.models.sonar_text.builder import ( SonarTextDecoderConfig, @@ -23,140 +22,137 @@ sonar_text_encoder_archs, ) from sonar.models.sonar_text.model import SonarTextTransformerEncoderModel -from sonar.store import asset_store -load_sonar_tokenizer = NllbTokenizerLoader(asset_store, download_manager) +def convert_sonar_text_encoder_checkpoint( + checkpoint: Mapping[str, Any], config: SonarTextEncoderConfig +) -> Mapping[str, Any]: + # Return directly if found fairseq2 attribute in state dict + if ( + "model" in checkpoint.keys() + and "encoder_frontend.embed.weight" in checkpoint["model"].keys() + ): + return checkpoint + + state_dict = checkpoint["state_dict"] + + try: + del state_dict["version"] + del state_dict["embed_positions._float_tensor"] + except: + pass + # del state_dict["decoder.version"] + + out_checkpoint = {"model": state_dict} + + key_map = { + r"layers\.([0-9]+)\.self_attn\.q_proj\.": r"encoder.layers.\1.self_attn.q_proj.", + r"layers\.([0-9]+)\.self_attn\.v_proj\.": r"encoder.layers.\1.self_attn.v_proj.", + r"layers\.([0-9]+)\.self_attn\.k_proj\.": r"encoder.layers.\1.self_attn.k_proj.", + r"layers\.([0-9]+)\.self_attn\.out_proj\.": r"encoder.layers.\1.self_attn.output_proj.", + r"layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder.layers.\1.self_attn_layer_norm.", + r"layers\.([0-9]+)\.fc1\.": r"encoder.layers.\1.ffn.inner_proj.", + r"layers\.([0-9]+)\.fc2\.": r"encoder.layers.\1.ffn.output_proj.", + r"layers\.([0-9]+)\.final_layer_norm\.": r"encoder.layers.\1.ffn_layer_norm.", + r"embed_tokens\.": r"encoder_frontend.embed.", + # fmt: on + } + + out_checkpoint = convert_fairseq_checkpoint(out_checkpoint, key_map) + + embeds = checkpoint["embed_tokens"].weight + # # The embedding positions of the control tokens do not match the + # # SentencePiece model of the tokenizer. + with torch.inference_mode(): + # (BOS, PAD, EOS, UNK) -> (PAD, UNK, BOS, EOS) + embeds[[0, 1, 2, 3]] = embeds[[1, 3, 0, 2]] + out_checkpoint["encoder_frontend.embed.weight"] = embeds + + return out_checkpoint + + +load_sonar_text_encoder_config = ConfigLoader[SonarTextEncoderConfig]( + asset_store, sonar_text_encoder_archs +) -@final -class SonarTextEncoderLoader( - ModelLoader[SonarTextTransformerEncoderModel, SonarTextEncoderConfig] -): - """Loads SonarEncoder models.""" - - @finaloverride - def _upgrade_checkpoint( - self, checkpoint: Mapping[str, Any], config: SonarTextEncoderConfig - ) -> Mapping[str, Any]: - # Return directly if found fairseq2 attribute in state dict - if ( - "model" in checkpoint.keys() - and "encoder_frontend.embed.weight" in checkpoint["model"].keys() - ): - return checkpoint - - state_dict = checkpoint["state_dict"] - - try: - del state_dict["version"] - del state_dict["embed_positions._float_tensor"] - except: - pass - # del state_dict["decoder.version"] - - out_checkpoint = {"model": state_dict} - out_checkpoint = upgrade_fairseq_checkpoint( - out_checkpoint, self._fairseq_key_map() - ) - embeds = checkpoint["embed_tokens"].weight - # # The embedding positions of the control tokens do not match the - # # SentencePiece model of the tokenizer. - with torch.inference_mode(): - # (BOS, PAD, EOS, UNK) -> (PAD, UNK, BOS, EOS) - embeds[[0, 1, 2, 3]] = embeds[[1, 3, 0, 2]] - out_checkpoint["encoder_frontend.embed.weight"] = embeds - - return out_checkpoint - - @staticmethod - def _fairseq_key_map() -> Dict[str, str]: - return { - r"layers\.([0-9]+)\.self_attn\.q_proj\.": r"encoder.layers.\1.self_attn.q_proj.", - r"layers\.([0-9]+)\.self_attn\.v_proj\.": r"encoder.layers.\1.self_attn.v_proj.", - r"layers\.([0-9]+)\.self_attn\.k_proj\.": r"encoder.layers.\1.self_attn.k_proj.", - r"layers\.([0-9]+)\.self_attn\.out_proj\.": r"encoder.layers.\1.self_attn.output_proj.", - r"layers\.([0-9]+)\.self_attn_layer_norm\.": r"encoder.layers.\1.self_attn_layer_norm.", - r"layers\.([0-9]+)\.fc1\.": r"encoder.layers.\1.ffn.inner_proj.", - r"layers\.([0-9]+)\.fc2\.": r"encoder.layers.\1.ffn.output_proj.", - r"layers\.([0-9]+)\.final_layer_norm\.": r"encoder.layers.\1.ffn_layer_norm.", - r"embed_tokens\.": r"encoder_frontend.embed.", - # fmt: on - } - - -load_sonar_text_encoder_model = SonarTextEncoderLoader( +load_sonar_text_encoder_model = ModelLoader[ + SonarTextTransformerEncoderModel, SonarTextEncoderConfig +]( asset_store, download_manager, + load_sonar_text_encoder_config, create_sonar_text_encoder_model, - sonar_text_encoder_archs, + convert_sonar_text_encoder_checkpoint, + restrict_checkpoints=False, ) -@final -class SonarTextDecoderLoader( - ModelLoader[TransformerDecoderModel, SonarTextDecoderConfig] -): - """Loads SonarEncoder models.""" - - @finaloverride - def _upgrade_checkpoint( - self, checkpoint: Mapping[str, Any], config: SonarTextDecoderConfig - ) -> Mapping[str, Any]: - # Return directly if found fairseq2 attribute in state dict - if ( - "model" in checkpoint.keys() - and "decoder_frontend.embed.weight" in checkpoint["model"].keys() - ): - return checkpoint - - state_dict = checkpoint["state_dict"] - try: - del state_dict["version"] - del state_dict["embed_positions._float_tensor"] - except: - pass - - out_checkpoint = {"model": state_dict} - out_checkpoint = upgrade_fairseq_checkpoint( - out_checkpoint, self._fairseq_key_map() - ) - embeds = out_checkpoint["model"]["decoder_frontend.embed.weight"] - # # The embedding positions of the control tokens do not match the - # # SentencePiece model of the tokenizer. - with torch.inference_mode(): - # (BOS, PAD, EOS, UNK) -> (PAD, UNK, BOS, EOS) - embeds[[0, 1, 2, 3]] = embeds[[1, 3, 0, 2]] - out_checkpoint["model"]["decoder_frontend.embed.weight"] = embeds - return out_checkpoint - - @staticmethod - def _fairseq_key_map() -> Dict[str, str]: - return { - r"layers\.([0-9]+)\.self_attn\.k_proj\.": r"decoder.layers.\1.self_attn.k_proj.", - r"layers\.([0-9]+)\.self_attn\.v_proj\.": r"decoder.layers.\1.self_attn.v_proj.", - r"layers\.([0-9]+)\.self_attn\.q_proj\.": r"decoder.layers.\1.self_attn.q_proj.", - r"layers\.([0-9]+)\.self_attn.out_proj\.": r"decoder.layers.\1.self_attn.output_proj.", - r"layers\.([0-9]+)\.self_attn_layer_norm\.": r"decoder.layers.\1.self_attn_layer_norm.", - r"layers\.([0-9]+).ffn\.inner_proj\.": r"decoder.layers.\1.ffn.inner_proj.", - r"layers\.([0-9]+).ffn\.output_proj\.": r"decoder.layers.\1.ffn.output_proj.", - r"layers\.([0-9]+)\.ffn_layer_norm\.": r"decoder.layers.\1.ffn_layer_norm.", - r"layers\.([0-9]+).encoder_attn\.k_proj\.": r"decoder.layers.\1.encoder_decoder_attn.k_proj.", - r"layers\.([0-9]+).encoder_attn\.v_proj\.": r"decoder.layers.\1.encoder_decoder_attn.v_proj.", - r"layers\.([0-9]+).encoder_attn\.q_proj\.": r"decoder.layers.\1.encoder_decoder_attn.q_proj.", - r"layers\.([0-9]+).encoder_attn\.out_proj\.": r"decoder.layers.\1.encoder_decoder_attn.output_proj.", - r"layers\.([0-9]+)\.encoder_attn_layer_norm\.": r"decoder.layers.\1.encoder_decoder_attn_layer_norm.", - r"layers\.([0-9]+)\.fc1\.": r"decoder.layers.\1.ffn.inner_proj.", - r"layers\.([0-9]+)\.fc2\.": r"decoder.layers.\1.ffn.output_proj.", - r"layers\.([0-9]+)\.final_layer_norm\.": r"decoder.layers.\1.ffn_layer_norm.", - r"output_projection.": r"final_proj.", - r"embed_tokens.": r"decoder_frontend.embed.", - r"layer_norm.": r"decoder.layer_norm.", - } - - -load_sonar_text_decoder_model = SonarTextDecoderLoader( +def convert_sonar_text_decoder_checkpoint( + checkpoint: Mapping[str, Any], config: SonarTextDecoderConfig +) -> Mapping[str, Any]: + # Return directly if found fairseq2 attribute in state dict + if ( + "model" in checkpoint.keys() + and "decoder_frontend.embed.weight" in checkpoint["model"].keys() + ): + return checkpoint + + state_dict = checkpoint["state_dict"] + try: + del state_dict["version"] + del state_dict["embed_positions._float_tensor"] + except: + pass + + out_checkpoint = {"model": state_dict} + + key_map = { + r"layers\.([0-9]+)\.self_attn\.k_proj\.": r"decoder.layers.\1.self_attn.k_proj.", + r"layers\.([0-9]+)\.self_attn\.v_proj\.": r"decoder.layers.\1.self_attn.v_proj.", + r"layers\.([0-9]+)\.self_attn\.q_proj\.": r"decoder.layers.\1.self_attn.q_proj.", + r"layers\.([0-9]+)\.self_attn.out_proj\.": r"decoder.layers.\1.self_attn.output_proj.", + r"layers\.([0-9]+)\.self_attn_layer_norm\.": r"decoder.layers.\1.self_attn_layer_norm.", + r"layers\.([0-9]+).ffn\.inner_proj\.": r"decoder.layers.\1.ffn.inner_proj.", + r"layers\.([0-9]+).ffn\.output_proj\.": r"decoder.layers.\1.ffn.output_proj.", + r"layers\.([0-9]+)\.ffn_layer_norm\.": r"decoder.layers.\1.ffn_layer_norm.", + r"layers\.([0-9]+).encoder_attn\.k_proj\.": r"decoder.layers.\1.encoder_decoder_attn.k_proj.", + r"layers\.([0-9]+).encoder_attn\.v_proj\.": r"decoder.layers.\1.encoder_decoder_attn.v_proj.", + r"layers\.([0-9]+).encoder_attn\.q_proj\.": r"decoder.layers.\1.encoder_decoder_attn.q_proj.", + r"layers\.([0-9]+).encoder_attn\.out_proj\.": r"decoder.layers.\1.encoder_decoder_attn.output_proj.", + r"layers\.([0-9]+)\.encoder_attn_layer_norm\.": r"decoder.layers.\1.encoder_decoder_attn_layer_norm.", + r"layers\.([0-9]+)\.fc1\.": r"decoder.layers.\1.ffn.inner_proj.", + r"layers\.([0-9]+)\.fc2\.": r"decoder.layers.\1.ffn.output_proj.", + r"layers\.([0-9]+)\.final_layer_norm\.": r"decoder.layers.\1.ffn_layer_norm.", + r"output_projection.": r"final_proj.", + r"embed_tokens.": r"decoder_frontend.embed.", + r"layer_norm.": r"decoder.layer_norm.", + } + + out_checkpoint = convert_fairseq_checkpoint(out_checkpoint, key_map) + + embeds = out_checkpoint["model"]["decoder_frontend.embed.weight"] + # # The embedding positions of the control tokens do not match the + # # SentencePiece model of the tokenizer. + with torch.inference_mode(): + # (BOS, PAD, EOS, UNK) -> (PAD, UNK, BOS, EOS) + embeds[[0, 1, 2, 3]] = embeds[[1, 3, 0, 2]] + out_checkpoint["model"]["decoder_frontend.embed.weight"] = embeds + return out_checkpoint + + +load_sonar_text_decoder_config = ConfigLoader[SonarTextDecoderConfig]( + asset_store, sonar_text_decoder_archs +) + +load_sonar_text_decoder_model = ModelLoader[ + TransformerDecoderModel, SonarTextDecoderConfig +]( asset_store, download_manager, + load_sonar_text_decoder_config, create_sonar_text_decoder_model, - sonar_text_decoder_archs, + convert_sonar_text_decoder_checkpoint, + restrict_checkpoints=False, ) + +load_sonar_tokenizer = NllbTokenizerLoader(asset_store, download_manager) diff --git a/sonar/models/sonar_text/model.py b/sonar/models/sonar_text/model.py index 746db56..0aab974 100644 --- a/sonar/models/sonar_text/model.py +++ b/sonar/models/sonar_text/model.py @@ -11,12 +11,12 @@ from fairseq2.models.sequence import SequenceBatch from fairseq2.models.transformer.frontend import TransformerFrontend from fairseq2.nn.normalization import LayerNorm +from fairseq2.nn.padding import PaddingMask, apply_padding_mask from fairseq2.nn.transformer import TransformerEncoder from overrides import final as finaloverride from torch import Tensor -from sonar.models import SonarEncoderModel, SonarEncoderOutput -from sonar.nn.utils import _neg_inf, compute_seq_length +from sonar.models.encoder_model import SonarEncoderModel, SonarEncoderOutput class Pooling(Enum): @@ -64,7 +64,7 @@ def __init__( @staticmethod def sentence_embedding_pooling( - seqs: Tensor, padding_mask: Optional[Tensor], pooling: Pooling + seqs: Tensor, padding_mask: Optional[PaddingMask], pooling: Pooling ) -> Tensor: """Deterministic pooling along sequence dimension to get a sentence representation Args: @@ -75,27 +75,29 @@ def sentence_embedding_pooling( Returns: Tensor: bs x model_dim """ - - if padding_mask is None: - padding_mask = torch.zeros(seqs.shape[:2], device=seqs.device) - if pooling == Pooling.LAST: - seq_length = compute_seq_length(padding_mask, _neg_inf) - sentence_embedding = seqs[ - [torch.arange(seq_length.shape[0]), (seq_length - 1).clip_(0)] - ] + if padding_mask is None: + sentence_embedding = seqs[:, -1] + else: + seq_lens = padding_mask.seq_lens + + sentence_embedding = seqs[ + [torch.arange(seq_lens.shape[0]), (seq_lens - 1).clip_(0)] + ] elif pooling == Pooling.MAX: - seqs = torch.clone(seqs) - seqs[padding_mask == _neg_inf] = _neg_inf + seqs = apply_padding_mask(seqs, padding_mask, pad_value=-torch.inf) sentence_embedding = seqs.max(dim=1).values elif pooling == Pooling.MEAN: - seqs = torch.clone(seqs) - seqs[padding_mask == _neg_inf] = 0 + seqs = apply_padding_mask(seqs, padding_mask, pad_value=0) sentence_embedding = seqs.sum(dim=1) - weights = 1.0 / ((padding_mask != _neg_inf).float().sum(dim=1) + 1e-7) - sentence_embedding = torch.einsum( - "i...,i ->i...", sentence_embedding, weights - ) + if padding_mask is None: + weights = 1.0 / (seqs.size(1) + 1e-7) + sentence_embedding = sentence_embedding * weights + else: + weights = 1.0 / (padding_mask.seq_lens.float() + 1e-7) + sentence_embedding = torch.einsum( + "i...,i->i...", sentence_embedding, weights + ) else: raise NotImplementedError(pooling) @@ -103,7 +105,7 @@ def sentence_embedding_pooling( @finaloverride def forward(self, batch: SequenceBatch) -> SonarEncoderOutput: - embed_seqs, padding_mask = self.encoder_frontend(batch.seqs, batch.seq_lens) + embed_seqs, padding_mask = self.encoder_frontend(batch.seqs, batch.padding_mask) encoded_seqs, _ = self.encoder(embed_seqs, padding_mask) diff --git a/sonar/models/sonar_translation/__init__.py b/sonar/models/sonar_translation/__init__.py index 8dc2b31..824d6ee 100644 --- a/sonar/models/sonar_translation/__init__.py +++ b/sonar/models/sonar_translation/__init__.py @@ -4,6 +4,12 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from sonar.models.sonar_translation.builder import ( + create_sonar_speech_to_text_model as create_sonar_speech_to_text_model, +) +from sonar.models.sonar_translation.builder import ( + create_sonar_text_encoder_decoder_model as create_sonar_text_encoder_decoder_model, +) from sonar.models.sonar_translation.model import ( SonarEncoderDecoderModel as SonarEncoderDecoderModel, ) diff --git a/sonar/models/sonar_translation/builder.py b/sonar/models/sonar_translation/builder.py index b0e490b..c37caf0 100644 --- a/sonar/models/sonar_translation/builder.py +++ b/sonar/models/sonar_translation/builder.py @@ -4,7 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from typing import Optional from fairseq2.typing import DataType, Device @@ -25,6 +24,7 @@ def create_sonar_text_encoder_decoder_model( encoder_config: SonarTextEncoderConfig, decoder_config: SonarTextDecoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> SonarEncoderDecoderModel: @@ -39,8 +39,12 @@ def create_sonar_text_encoder_decoder_model( :param dtype: The data type of module parameters and buffers. """ - encoder = SonarTextEncoderBuilder(encoder_config, device, dtype).build_model() - decoder = SonarTextDecoderBuilder(decoder_config, device, dtype).build_model() + encoder = SonarTextEncoderBuilder( + encoder_config, device=device, dtype=dtype + ).build_model() + decoder = SonarTextDecoderBuilder( + decoder_config, device=device, dtype=dtype + ).build_model() return SonarEncoderDecoderModel(encoder=encoder, decoder=decoder).to( device=device, dtype=dtype @@ -50,6 +54,7 @@ def create_sonar_text_encoder_decoder_model( def create_sonar_speech_to_text_model( encoder_config: SonarSpeechEncoderConfig, decoder_config: SonarTextDecoderConfig, + *, device: Optional[Device] = None, dtype: Optional[DataType] = None, ) -> SonarEncoderDecoderModel: @@ -64,8 +69,12 @@ def create_sonar_speech_to_text_model( :param dtype: The data type of module parameters and buffers. """ - encoder = create_sonar_speech_encoder_model(encoder_config, device, dtype) - decoder = SonarTextDecoderBuilder(decoder_config, device, dtype).build_model() + encoder = create_sonar_speech_encoder_model( + encoder_config, device=device, dtype=dtype + ) + decoder = SonarTextDecoderBuilder( + decoder_config, device=device, dtype=dtype + ).build_model() return SonarEncoderDecoderModel(encoder=encoder, decoder=decoder).to( device=device, dtype=dtype diff --git a/sonar/models/sonar_translation/model.py b/sonar/models/sonar_translation/model.py index 2b7c4ee..9da8dfa 100644 --- a/sonar/models/sonar_translation/model.py +++ b/sonar/models/sonar_translation/model.py @@ -9,9 +9,10 @@ from fairseq2.models.encoder_decoder import EncoderDecoderModel from fairseq2.models.sequence import SequenceBatch, SequenceModelOutput from fairseq2.models.transformer import TransformerDecoderModel +from fairseq2.nn.padding import PaddingMask from torch import Tensor -from sonar.models import SonarEncoderModel +from sonar.models.encoder_model import SonarEncoderModel @final @@ -30,7 +31,7 @@ def __init__( encoder: SonarEncoderModel, decoder: TransformerDecoderModel, ) -> None: - super().__init__(model_dim=encoder.model_dim) + super().__init__(encoder.model_dim, decoder.vocab_info) if encoder.model_dim != decoder.model_dim: raise ValueError( f"`model_dim` of `encoder` and `model_dim` of `decoder` must be equal, but are {encoder.model_dim} and {decoder.model_dim} instead." @@ -39,27 +40,33 @@ def __init__( self.decoder = decoder def encode( - self, seqs: Tensor, seq_lens: Optional[Tensor] - ) -> Tuple[Tensor, Optional[Tensor]]: - batch = SequenceBatch(seqs, seq_lens) + self, seqs: Tensor, padding_mask: Optional[PaddingMask] + ) -> Tuple[Tensor, Optional[PaddingMask]]: + batch = SequenceBatch(seqs, padding_mask) sonar_output_encoder = self.encoder(batch) return (sonar_output_encoder.sentence_embeddings.unsqueeze(1), None) def decode( self, seqs: Tensor, - seq_lens: Optional[Tensor], + padding_mask: Optional[PaddingMask], encoder_output: Tensor, - encoder_padding_mask: Optional[Tensor], + encoder_padding_mask: Optional[PaddingMask], state_bag=None, - ) -> Tuple[Tensor, Optional[Tensor]]: - seqs, padding_mask = self.decoder.decoder_frontend(seqs, seq_lens, state_bag) + ) -> Tuple[Tensor, Optional[PaddingMask]]: + seqs, padding_mask = self.decoder.decoder_frontend( + seqs, padding_mask, state_bag=state_bag + ) return self.decoder.decoder( # type: ignore[no-any-return] - seqs, padding_mask, encoder_output, encoder_padding_mask, state_bag + seqs, + padding_mask, + encoder_output, + encoder_padding_mask, + state_bag=state_bag, ) def project( - self, decoder_output: Tensor, decoder_padding_mask: Optional[Tensor] + self, decoder_output: Tensor, decoder_padding_mask: Optional[PaddingMask] ) -> SequenceModelOutput: return self.decoder.project(decoder_output, decoder_padding_mask) diff --git a/sonar/nn/encoder_pooler.py b/sonar/nn/encoder_pooler.py index 094755c..5ba6d36 100644 --- a/sonar/nn/encoder_pooler.py +++ b/sonar/nn/encoder_pooler.py @@ -10,6 +10,7 @@ import torch from fairseq2.models.transformer.frontend import TransformerFrontend from fairseq2.nn import Linear +from fairseq2.nn.padding import PaddingMask from fairseq2.nn.transformer.decoder import TransformerDecoder from fairseq2.typing import Device from overrides import override @@ -24,7 +25,7 @@ class EncoderOutputPooler(Module): def __call__( self, encoder_output: Tensor, - encoder_padding_mask: Optional[Tensor], + encoder_padding_mask: Optional[PaddingMask], ) -> Tensor: """Apply pooling on encoder_output @@ -70,7 +71,7 @@ def __init__( def __call__( self, encoder_output: Tensor, - encoder_padding_mask: Optional[Tensor], + encoder_padding_mask: Optional[PaddingMask], ) -> Tensor: seqs = self._get_pooling_tokens(encoder_output.shape[0], encoder_output.device) diff --git a/sonar/store/__init__.py b/sonar/store/__init__.py deleted file mode 100644 index 8ee97f8..0000000 --- a/sonar/store/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from pathlib import Path - -from fairseq2.assets.card_storage import LocalAssetCardStorage -from fairseq2.assets.store import AssetStore, DefaultAssetStore - - -def create_default_asset_store() -> AssetStore: - pathname = Path(__file__).parent.joinpath("cards") - card_storage = LocalAssetCardStorage(pathname) - - return DefaultAssetStore(card_storage) - - -asset_store = create_default_asset_store() diff --git a/sonar/store/cards/blaser_2_0_qe.yaml b/sonar/store/cards/blaser_2_0_qe.yaml deleted file mode 100644 index 555a58c..0000000 --- a/sonar/store/cards/blaser_2_0_qe.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. -name: blaser_2_0_qe -model_type: blaser -model_arch: basic_qe -checkpoint: "https://huggingface.co/facebook/blaser-2.0-qe/resolve/main/model.pt" diff --git a/sonar/store/cards/sonar_speech_encoder_arb.yaml b/sonar/store/cards/sonar_speech_encoder_arb.yaml deleted file mode 100644 index 316e2ba..0000000 --- a/sonar/store/cards/sonar_speech_encoder_arb.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.arb.pt" -default_lang: arb -langs: - - arb \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ben.yaml b/sonar/store/cards/sonar_speech_encoder_ben.yaml deleted file mode 100644 index fdda546..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ben.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ben.pt" -default_lang: ben -langs: - - ben \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_cat.yaml b/sonar/store/cards/sonar_speech_encoder_cat.yaml deleted file mode 100644 index ba41beb..0000000 --- a/sonar/store/cards/sonar_speech_encoder_cat.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cat.pt" -default_lang: cat -langs: - - cat \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ces.yaml b/sonar/store/cards/sonar_speech_encoder_ces.yaml deleted file mode 100644 index dae7890..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ces.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ces.pt" -default_lang: ces -langs: - - ces \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_cmn.yaml b/sonar/store/cards/sonar_speech_encoder_cmn.yaml deleted file mode 100644 index ee6b445..0000000 --- a/sonar/store/cards/sonar_speech_encoder_cmn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cmn.pt" -default_lang: cmn -langs: - - cmn \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_cym.yaml b/sonar/store/cards/sonar_speech_encoder_cym.yaml deleted file mode 100644 index 5a03010..0000000 --- a/sonar/store/cards/sonar_speech_encoder_cym.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.cym.pt" -default_lang: cym -langs: - - cym \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_dan.yaml b/sonar/store/cards/sonar_speech_encoder_dan.yaml deleted file mode 100644 index 931d1f1..0000000 --- a/sonar/store/cards/sonar_speech_encoder_dan.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.dan.pt" -default_lang: dan -langs: - - dan \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_deu.yaml b/sonar/store/cards/sonar_speech_encoder_deu.yaml deleted file mode 100644 index 535eb3c..0000000 --- a/sonar/store/cards/sonar_speech_encoder_deu.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.deu.pt" -default_lang: deu -langs: - - deu \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_eng.yaml b/sonar/store/cards/sonar_speech_encoder_eng.yaml deleted file mode 100644 index 49da3e7..0000000 --- a/sonar/store/cards/sonar_speech_encoder_eng.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.eng.pt" -default_lang: eng -langs: - - eng \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_est.yaml b/sonar/store/cards/sonar_speech_encoder_est.yaml deleted file mode 100644 index 3a2207c..0000000 --- a/sonar/store/cards/sonar_speech_encoder_est.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.est.pt" -default_lang: est -langs: - - est \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_fin.yaml b/sonar/store/cards/sonar_speech_encoder_fin.yaml deleted file mode 100644 index 9712532..0000000 --- a/sonar/store/cards/sonar_speech_encoder_fin.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fin.pt" -default_lang: fin -langs: - - fin \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_fra.yaml b/sonar/store/cards/sonar_speech_encoder_fra.yaml deleted file mode 100644 index c1cd141..0000000 --- a/sonar/store/cards/sonar_speech_encoder_fra.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.fra.pt" -default_lang: fra -langs: - - fra \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_hin.yaml b/sonar/store/cards/sonar_speech_encoder_hin.yaml deleted file mode 100644 index 3f8afde..0000000 --- a/sonar/store/cards/sonar_speech_encoder_hin.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.hin.pt" -default_lang: hin -langs: - - hin \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ind.yaml b/sonar/store/cards/sonar_speech_encoder_ind.yaml deleted file mode 100644 index bccd5fb..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ind.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ind.pt" -default_lang: ind -langs: - - ind \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ita.yaml b/sonar/store/cards/sonar_speech_encoder_ita.yaml deleted file mode 100644 index d1794aa..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ita.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ita.pt" -default_lang: ita -langs: - - ita \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_jpn.yaml b/sonar/store/cards/sonar_speech_encoder_jpn.yaml deleted file mode 100644 index b903244..0000000 --- a/sonar/store/cards/sonar_speech_encoder_jpn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.jpn.pt" -default_lang: jpn -langs: - - jpn \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_kan.yaml b/sonar/store/cards/sonar_speech_encoder_kan.yaml deleted file mode 100644 index 3fc0888..0000000 --- a/sonar/store/cards/sonar_speech_encoder_kan.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.kan.pt" -default_lang: kan -langs: - - kan \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_kor.yaml b/sonar/store/cards/sonar_speech_encoder_kor.yaml deleted file mode 100644 index 2c6b059..0000000 --- a/sonar/store/cards/sonar_speech_encoder_kor.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.kor.pt" -default_lang: kor -langs: - - kor \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_mlt.yaml b/sonar/store/cards/sonar_speech_encoder_mlt.yaml deleted file mode 100644 index f52866d..0000000 --- a/sonar/store/cards/sonar_speech_encoder_mlt.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.mlt.pt" -default_lang: mlt -langs: - - mlt \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_nld.yaml b/sonar/store/cards/sonar_speech_encoder_nld.yaml deleted file mode 100644 index af0e0d8..0000000 --- a/sonar/store/cards/sonar_speech_encoder_nld.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.nld.pt" -default_lang: nld -langs: - - nld \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_pes.yaml b/sonar/store/cards/sonar_speech_encoder_pes.yaml deleted file mode 100644 index 01ea15d..0000000 --- a/sonar/store/cards/sonar_speech_encoder_pes.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.pes.pt" -default_lang: pes -langs: - - pes \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_pol.yaml b/sonar/store/cards/sonar_speech_encoder_pol.yaml deleted file mode 100644 index a9fe352..0000000 --- a/sonar/store/cards/sonar_speech_encoder_pol.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.pol.pt" -default_lang: pol -langs: - - pol \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_por.yaml b/sonar/store/cards/sonar_speech_encoder_por.yaml deleted file mode 100644 index 037bb74..0000000 --- a/sonar/store/cards/sonar_speech_encoder_por.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.por.pt" -default_lang: por -langs: - - por \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ron.yaml b/sonar/store/cards/sonar_speech_encoder_ron.yaml deleted file mode 100644 index 75ce6ac..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ron.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ron.pt" -default_lang: ron -langs: - - ron \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_rus.yaml b/sonar/store/cards/sonar_speech_encoder_rus.yaml deleted file mode 100644 index 7fcf958..0000000 --- a/sonar/store/cards/sonar_speech_encoder_rus.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.rus.pt" -default_lang: rus -langs: - - rus \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_slk.yaml b/sonar/store/cards/sonar_speech_encoder_slk.yaml deleted file mode 100644 index e982304..0000000 --- a/sonar/store/cards/sonar_speech_encoder_slk.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.slk.pt" -default_lang: slk -langs: - - slk \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_spa.yaml b/sonar/store/cards/sonar_speech_encoder_spa.yaml deleted file mode 100644 index 2071da7..0000000 --- a/sonar/store/cards/sonar_speech_encoder_spa.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.spa.pt" -default_lang: spa -langs: - - spa \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_swh.yaml b/sonar/store/cards/sonar_speech_encoder_swh.yaml deleted file mode 100644 index baf2084..0000000 --- a/sonar/store/cards/sonar_speech_encoder_swh.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.swh.pt" -default_lang: swh -langs: - - swh \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_tam.yaml b/sonar/store/cards/sonar_speech_encoder_tam.yaml deleted file mode 100644 index 64710df..0000000 --- a/sonar/store/cards/sonar_speech_encoder_tam.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tam.pt" -default_lang: tam -langs: - - tam \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_tel.yaml b/sonar/store/cards/sonar_speech_encoder_tel.yaml deleted file mode 100644 index f7db288..0000000 --- a/sonar/store/cards/sonar_speech_encoder_tel.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tel.pt" -default_lang: tel -langs: - - tel \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_tgl.yaml b/sonar/store/cards/sonar_speech_encoder_tgl.yaml deleted file mode 100644 index 92975d8..0000000 --- a/sonar/store/cards/sonar_speech_encoder_tgl.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tgl.pt" -default_lang: tgl -langs: - - tgl \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_tha.yaml b/sonar/store/cards/sonar_speech_encoder_tha.yaml deleted file mode 100644 index 28d8abb..0000000 --- a/sonar/store/cards/sonar_speech_encoder_tha.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tha.pt" -default_lang: tha -langs: - - tha \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_tur.yaml b/sonar/store/cards/sonar_speech_encoder_tur.yaml deleted file mode 100644 index 1f5c13e..0000000 --- a/sonar/store/cards/sonar_speech_encoder_tur.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.tur.pt" -default_lang: tur -langs: - - tur \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_ukr.yaml b/sonar/store/cards/sonar_speech_encoder_ukr.yaml deleted file mode 100644 index 3fa6dee..0000000 --- a/sonar/store/cards/sonar_speech_encoder_ukr.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.ukr.pt" -default_lang: ukr -langs: - - ukr \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_urd.yaml b/sonar/store/cards/sonar_speech_encoder_urd.yaml deleted file mode 100644 index 18445ef..0000000 --- a/sonar/store/cards/sonar_speech_encoder_urd.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.urd.pt" -default_lang: urd -langs: - - urd \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_uzn.yaml b/sonar/store/cards/sonar_speech_encoder_uzn.yaml deleted file mode 100644 index c6e084b..0000000 --- a/sonar/store/cards/sonar_speech_encoder_uzn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.uzn.pt" -default_lang: uzn -langs: - - uzn \ No newline at end of file diff --git a/sonar/store/cards/sonar_speech_encoder_vie.yaml b/sonar/store/cards/sonar_speech_encoder_vie.yaml deleted file mode 100644 index d8a79ff..0000000 --- a/sonar/store/cards/sonar_speech_encoder_vie.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -model_name: sonar_speech_encoder -model_type: sonar_speech -model_arch: non_english -checkpoint: "https://dl.fbaipublicfiles.com/SONAR/spenc.v3ap.vie.pt" -default_lang: vie -langs: - - vie \ No newline at end of file diff --git a/tests/integration_tests/test_laser2_text.py b/tests/integration_tests/test_laser2_text.py index c173161..8e7690b 100644 --- a/tests/integration_tests/test_laser2_text.py +++ b/tests/integration_tests/test_laser2_text.py @@ -4,8 +4,10 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. +import os import tempfile +import pytest import torch from fairseq2.data import Collater, text from torch.testing import assert_close @@ -22,6 +24,7 @@ ] +@pytest.mark.skipif(not os.getenv("FAIR_ENV_CLUSTER"), reason="internal tests in FAIR") def test_load_laser2_text() -> None: model = load_laser2_model( "laser2_text_encoder", device=device, progress=False @@ -36,7 +39,7 @@ def test_load_laser2_text() -> None: text.read_text(tmp.name, rtrim=True, ltrim=True, memory_map=True) .map(encoder) .bucket(len(sentences), drop_remainder=True) - .map(Collater(pad_idx=1)) + .map(Collater(pad_value=1)) .and_return() ) tokenized_sentences = next(iter(pipeline)) diff --git a/tests/integration_tests/test_loading_multilang_speech_encoder.py b/tests/integration_tests/test_loading_multilang_speech_encoder.py deleted file mode 100644 index d1b18c6..0000000 --- a/tests/integration_tests/test_loading_multilang_speech_encoder.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates -# All rights reserved. -# -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. - -import glob -from pathlib import Path - -import pytest -import torch - -from sonar.models.sonar_speech.loader import load_sonar_speech_model - - -@pytest.mark.skip(reason="loading all models could take a long time") -def test_load_sonar_speech_model() -> None: - list_of_files = glob.glob("../../sonar/store/cards/sonar_speech_encoder_*.yaml") - assert len(list_of_files) == 37 - for file in list_of_files: - try: - load_sonar_speech_model(Path(file).stem, device=torch.device("cpu")) - except Exception as model_loading_exception: - raise ValueError( - f"Failed to load model {Path(file).stem}", str(model_loading_exception) - ) diff --git a/tests/integration_tests/test_text_sonar.py b/tests/integration_tests/test_text_sonar.py index c203da1..560eebd 100644 --- a/tests/integration_tests/test_text_sonar.py +++ b/tests/integration_tests/test_text_sonar.py @@ -56,7 +56,7 @@ def test_text_decoder_sonar(self) -> None: decoder = self.text2text.model.decoder dummy_prev_output_tokens = torch.Tensor([[3, 333]]).int() seqs, padding_mask = decoder.decoder_frontend( - dummy_prev_output_tokens, seq_lens=None + dummy_prev_output_tokens, padding_mask=None ) decoder_output, decoder_padding_mask = decoder.decoder( diff --git a/tests/unit_tests/test_sonar_pooling.py b/tests/unit_tests/test_sonar_pooling.py index f8e27b1..c4aeb99 100644 --- a/tests/unit_tests/test_sonar_pooling.py +++ b/tests/unit_tests/test_sonar_pooling.py @@ -5,16 +5,16 @@ # LICENSE file in the root directory of this source tree. import torch +from fairseq2.nn.padding import PaddingMask from torch.testing import assert_close # type: ignore -from sonar.models.sonar_text.model import _neg_inf # type: ignore from sonar.models.sonar_text.model import Pooling, SonarTextTransformerEncoderModel pooling_method = SonarTextTransformerEncoderModel.sentence_embedding_pooling def test_pooling_max() -> None: - padding_mask = torch.Tensor([[0, 0, _neg_inf], [0, _neg_inf, _neg_inf]]) + padding_mask = PaddingMask(torch.tensor([2, 1]), batch_seq_len=3) seqs = torch.Tensor( [[[7, 2], [3, 4], [10, 20]], [[-1, -2], [100, 1000], [-10, -20]]] ) @@ -27,7 +27,7 @@ def test_pooling_max() -> None: def test_pooling_mean() -> None: - padding_mask = torch.Tensor([[0, 0, _neg_inf], [0, _neg_inf, _neg_inf]]) + padding_mask = PaddingMask(torch.tensor([2, 1]), batch_seq_len=3) seqs = torch.Tensor( [[[7, 2], [3, 4], [10, 20]], [[-1, -2], [100, 1000], [-10, -20]]] ) @@ -40,7 +40,7 @@ def test_pooling_mean() -> None: def test_pooling_last() -> None: - padding_mask = torch.Tensor([[0, 0, _neg_inf], [0, _neg_inf, _neg_inf]]) + padding_mask = PaddingMask(torch.tensor([2, 1]), batch_seq_len=3) seqs = torch.Tensor( [[[7, 2], [3, 4], [10, 20]], [[-1, -2], [100, 1000], [-10, -20]]] ) diff --git a/tests/unit_tests/test_sonar_speech.py b/tests/unit_tests/test_sonar_speech.py new file mode 100644 index 0000000..4ae7e66 --- /dev/null +++ b/tests/unit_tests/test_sonar_speech.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +import pytest +import torch +import torchaudio # type: ignore[import] + +from sonar.inference_pipelines.speech import SpeechToEmbeddingModelPipeline + + +@pytest.fixture(scope="module") +def encoder(): + # request.param: whether the encoder is based on a quantized model (fp16) or not + device = torch.device("cpu") + + return SpeechToEmbeddingModelPipeline( + encoder="sonar_speech_encoder_eng", + device=device, + fbank_dtype=torch.float32, + ) + + +def test_speech_embedding_with_zeros_input(encoder): + audio = torch.zeros(1, 175920) + embedding = encoder.predict([audio]) + assert embedding.shape == torch.Size([1, 1024]) + + +def test_speech_embedding_with_waveform_input(encoder): + fake_audio = torch.rand(1, 175920) + embedding = encoder.predict([fake_audio]) + assert embedding.shape == torch.Size([1, 1024]) + + +# Parsing audio within sonar does not support fp16 audio decoding yet +def test_speech_embedding_pipeline_with_audio_files(tmp_path: Path, encoder): + print(torchaudio.list_audio_backends()) + fake_audio = torch.rand(1, 175920) + audio_file = tmp_path / "audio.wav" + torchaudio.save(audio_file, fake_audio, 16000) + embedding = encoder.predict([str(audio_file.resolve())]) + assert embedding.shape == torch.Size([1, 1024])